Przeglądaj źródła

0815 openai音频模型和百度智能云

Qing 8 miesięcy temu
rodzic
commit
798f5dad27

+ 10 - 1
consumer-service-demo/pom.xml

@@ -32,6 +32,7 @@
         <!-- ai相关版本 -->
         <spring-ai.version>1.0.0-M1</spring-ai.version>
         <baidu-ai.version>4.16.19</baidu-ai.version>
+        <mp3spi.version>1.9.5.4</mp3spi.version>
     </properties>
 
     <dependencyManagement>
@@ -77,7 +78,7 @@
                 <version>${freemarker.version}</version>
             </dependency>
 
-            <!-- 提供swagger功能  可以测试接口 -->
+            <!-- 提供swagger功能 -->
             <dependency>
                 <groupId>org.springdoc</groupId>
                 <artifactId>springdoc-openapi-starter-webmvc-api</artifactId>
@@ -96,6 +97,7 @@
                 <version>${lombok.version}</version>
             </dependency>
 
+            <!-- spring ai -->
             <dependency>
                 <groupId>org.springframework.ai</groupId>
                 <artifactId>spring-ai-bom</artifactId>
@@ -110,6 +112,13 @@
                 <artifactId>java-sdk</artifactId>
                 <version>${baidu-ai.version}</version>
             </dependency>
+
+            <!-- 音频格式转换工具 -->
+            <dependency>
+                <groupId>com.googlecode.soundlibs</groupId>
+                <artifactId>mp3spi</artifactId>
+                <version>${mp3spi.version}</version>
+            </dependency>
         </dependencies>
     </dependencyManagement>
 

+ 6 - 5
consumer-service-demo/spring-ai-demo/pom.xml

@@ -27,11 +27,6 @@
 			<artifactId>spring-ai-openai-spring-boot-starter</artifactId>
 		</dependency>
 
-<!--		<dependency>-->
-<!--			<groupId>org.springframework.ai</groupId>-->
-<!--			<artifactId>spring-ai-stability-ai-spring-boot-starter</artifactId>-->
-<!--		</dependency>-->
-
 		<dependency>
 			<groupId>org.projectlombok</groupId>
 			<artifactId>lombok</artifactId>
@@ -43,6 +38,11 @@
 			<artifactId>java-sdk</artifactId>
 		</dependency>
 
+		<dependency>
+			<groupId>com.googlecode.soundlibs</groupId>
+			<artifactId>mp3spi</artifactId>
+		</dependency>
+
 		<dependency>
 			<groupId>org.springframework.boot</groupId>
 			<artifactId>spring-boot-starter-test</artifactId>
@@ -55,6 +55,7 @@
 			<plugin>
 				<groupId>org.springframework.boot</groupId>
 				<artifactId>spring-boot-maven-plugin</artifactId>
+				<version>${spring.boot.version}</version>
 			</plugin>
 		</plugins>
 	</build>

+ 7 - 2
consumer-service-demo/spring-ai-demo/src/main/java/com/sf/ai/controller/video/TTSController.java → consumer-service-demo/spring-ai-demo/src/main/java/com/sf/ai/controller/audio/TTSController.java

@@ -1,4 +1,4 @@
-package com.sf.ai.controller.video;
+package com.sf.ai.controller.audio;
 
 import com.baidu.aip.util.Util;
 import lombok.RequiredArgsConstructor;
@@ -12,6 +12,7 @@ import org.springframework.web.bind.annotation.GetMapping;
 import org.springframework.web.bind.annotation.RequestParam;
 import org.springframework.web.bind.annotation.RestController;
 
+// 文本转语音
 @RestController
 @RequiredArgsConstructor
 public class TTSController {
@@ -22,13 +23,17 @@ public class TTSController {
     @SneakyThrows
     @GetMapping("/tts")
     public String tts(@RequestParam("message") String message) {
+        // 构建调用参数 包括具体模型 模型所需要的参数
         OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
-                .withModel("tts-1")
+//                .withModel("tts-1")
+                .withModel(OpenAiAudioApi.TtsModel.TTS_1.getValue())
                 .withVoice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY) //设置声音
                 .withResponseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3)
                 .withSpeed(1.0f)
                 .build();
+        // 将message 和 调用参数 组装成prompt
         SpeechPrompt speechPrompt = new SpeechPrompt(message, speechOptions);
+        // 将提示词传给方法 可以调用阻塞式方法call() 也可以调用流式方法stream()
         SpeechResponse response = audioSpeechModel.call(speechPrompt);
         byte[] output = response.getResult().getOutput();
         Util.writeBytesToFileSystem(output,"output.mp3");

+ 44 - 0
consumer-service-demo/spring-ai-demo/src/main/java/com/sf/ai/controller/audio/TranscriptionController.java

@@ -0,0 +1,44 @@
+package com.sf.ai.controller.audio;
+
+import lombok.RequiredArgsConstructor;
+import org.springframework.ai.openai.OpenAiAudioTranscriptionModel;
+import org.springframework.ai.openai.OpenAiAudioTranscriptionOptions;
+import org.springframework.ai.openai.api.OpenAiAudioApi;
+import org.springframework.ai.openai.audio.transcription.AudioTranscriptionPrompt;
+import org.springframework.ai.openai.audio.transcription.AudioTranscriptionResponse;
+import org.springframework.core.io.FileSystemResource;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.RestController;
+
+import java.io.File;
+
+@RestController
+@RequiredArgsConstructor
+public class TranscriptionController {
+
+    private final OpenAiAudioTranscriptionModel audioTranscriptionModel;
+
+    // http://localhost:8090/audio2Text
+    // 识别项目根路径下 output.mp3
+    @GetMapping("/audio2Text")
+    public String audio2Text() {
+        // 项目根路径
+        String path = System.getProperty("user.dir");
+        // File.separator 文件的分隔符  mac /  windows \\
+        File file = new File(path + File.separator + "output.mp3");
+        // 识别为资源
+        FileSystemResource resource = new FileSystemResource(file);
+
+        // 构造参数
+        OpenAiAudioTranscriptionOptions options = OpenAiAudioTranscriptionOptions.builder()
+                .withModel(OpenAiAudioApi.WhisperModel.WHISPER_1.getValue())
+                .withResponseFormat(OpenAiAudioApi.TranscriptResponseFormat.TEXT)
+                .withTemperature(0f)   // 设置模型的创新性
+                .build();
+
+        AudioTranscriptionPrompt prompt = new AudioTranscriptionPrompt(resource, options);
+        AudioTranscriptionResponse response = audioTranscriptionModel.call(prompt);
+        return response.getResult().getOutput();
+    }
+
+}

+ 111 - 0
consumer-service-demo/spring-ai-demo/src/main/java/com/sf/ai/controller/baidu/SampleController.java

@@ -0,0 +1,111 @@
+package com.sf.ai.controller.baidu;
+
+import com.baidu.aip.speech.AipSpeech;
+import com.baidu.aip.speech.TtsResponse;
+import com.baidu.aip.util.Util;
+import com.sf.ai.util.AudioUtils;
+import lombok.SneakyThrows;
+import org.json.JSONObject;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.RequestParam;
+import org.springframework.web.bind.annotation.RestController;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+
+@RestController
+public class SampleController {
+    //设置APPID/AK/SK
+//    public static final String APP_ID = "你的 App ID";
+//    public static final String API_KEY = "你的 Api Key";
+//    public static final String SECRET_KEY = "你的 Secret Key";
+
+    @Value("${baidu.app-id}")
+    private String APP_ID;
+    @Value("${baidu.api-key}")
+    private String API_KEY;
+    @Value("${baidu.secret-key}")
+    private String SECRET_KEY;
+
+    // http://localhost:8090/baidu/tts?message=
+    @GetMapping("/baidu/tts")
+    public String tts(@RequestParam("message") String message) {
+        // 初始化一个AipSpeech
+        AipSpeech client = new AipSpeech(APP_ID, API_KEY, SECRET_KEY);
+
+        // 可选:设置网络连接参数
+        client.setConnectionTimeoutInMillis(20000);
+        client.setSocketTimeoutInMillis(60000);
+
+        // 可选:设置代理服务器地址, http和socket二选一,或者均不设置
+//        client.setHttpProxy("proxy_host", proxy_port);  // 设置http代理
+//        client.setSocketProxy("proxy_host", proxy_port);  // 设置socket代理
+
+        // 可选:设置log4j日志输出格式,若不设置,则使用默认配置
+        // 也可以直接通过jvm启动参数设置此环境变量
+//        System.setProperty("aip.log4j.conf", "path/to/your/log4j.properties");
+
+        // 调用接口
+        HashMap<String, Object> options = new HashMap<String, Object>();
+        options.put("spd", "5");
+        options.put("pit", "5");
+        options.put("per", "4");
+        TtsResponse res = client.synthesis(message, "zh", 1, options);
+        byte[] data = res.getData();
+        JSONObject res1 = res.getResult();
+        if (data != null) {
+            try {
+                Util.writeBytesToFileSystem(data, "baiduOutput.mp3");
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        }
+        if (res1 != null) {
+            System.out.println(res1.toString(2));
+        }
+
+        return "success";
+    }
+
+
+    // http://localhost:8090/baidu/audio2Text
+    @SneakyThrows
+    @GetMapping("/baidu/audio2Text")
+    public String audio2Text() {
+        // 初始化一个AipSpeech
+        AipSpeech client = new AipSpeech(APP_ID, API_KEY, SECRET_KEY);
+
+        // 可选:设置网络连接参数
+        client.setConnectionTimeoutInMillis(20000);
+        client.setSocketTimeoutInMillis(60000);
+
+        // 可选:设置代理服务器地址, http和socket二选一,或者均不设置
+//        client.setHttpProxy("proxy_host", proxy_port);  // 设置http代理
+//        client.setSocketProxy("proxy_host", proxy_port);  // 设置socket代理
+
+        // 可选:设置log4j日志输出格式,若不设置,则使用默认配置
+        // 也可以直接通过jvm启动参数设置此环境变量
+//        System.setProperty("aip.log4j.conf", "path/to/your/log4j.properties");
+
+        // 项目根路径
+        String path = System.getProperty("user.dir");
+        // File.separator 文件的分隔符  mac /  windows \\
+        String mp3FileName = path + File.separator + "baiduOutput.mp3";
+        String pcmFileName = path + File.separator + "baiduOutput.pcm";
+        AudioUtils.mp3ToPcm(mp3FileName, pcmFileName);
+
+        // 调用接口
+//        JSONObject res = client.asr(pcmFileName, "pcm", 16000, null);
+//        System.out.println(res.toString(2));
+
+
+        HashMap<String, Object> options = new HashMap<>();
+        options.put("dev_pid","1737");
+        byte[] data = Util.readFileByBytes(mp3FileName);     //readFileByBytes仅为获取二进制数据示例
+        JSONObject asrRes2 = client.asr(data, "pcm", 16000, options);
+        System.out.println(asrRes2);
+        return "success";
+    }
+}

+ 3 - 2
consumer-service-demo/spring-ai-demo/src/main/java/com/sf/ai/controller/image/ImageController.java

@@ -33,8 +33,9 @@ public class ImageController {
         // 构造画图提示词
         ImagePrompt imagePrompt = new ImagePrompt(prompt, imageOptions);
         ImageResponse imageResponse = openAiImageModel.call(imagePrompt);
-        List<ImageGeneration> results = imageResponse.getResults();
-        Image image = results.get(0).getOutput();
+//        List<ImageGeneration> results = imageResponse.getResults();
+//        Image image = results.get(0).getOutput();
+        Image image = imageResponse.getResult().getOutput();
         String url = image.getUrl();
         return String.format("<img src='%s' alt='%s'>", url, prompt);
     }

+ 2 - 0
consumer-service-demo/spring-ai-demo/src/main/java/com/sf/ai/controller/multi/MultiController.java

@@ -6,6 +6,7 @@ import org.springframework.web.bind.annotation.GetMapping;
 import org.springframework.web.bind.annotation.RequestParam;
 import org.springframework.web.bind.annotation.RestController;
 
+// 多模态大模型
 @RestController
 @RequiredArgsConstructor
 public class MultiController {
@@ -16,6 +17,7 @@ public class MultiController {
     // http://localhost:8090/multi?prompt=需要一张英短猫的图片
     @GetMapping("/multi")
     public String multi(@RequestParam("prompt") String prompt) {
+        // AI自查 判断信息的需求是文本还是图片 根据判断结果进行分发
         Boolean judge = multiService.judge(prompt);
         if (judge) {
             return multiService.image(prompt);

+ 56 - 0
consumer-service-demo/spring-ai-demo/src/main/java/com/sf/ai/util/AudioUtils.java

@@ -0,0 +1,56 @@
+package com.sf.ai.util;
+
+import javazoom.spi.mpeg.sampled.file.MpegAudioFileReader;
+
+import javax.sound.sampled.AudioFileFormat;
+import javax.sound.sampled.AudioFormat;
+import javax.sound.sampled.AudioInputStream;
+import javax.sound.sampled.AudioSystem;
+import java.io.File;
+import java.io.IOException;
+
+public class AudioUtils {
+
+    // 接收源文件和目标文件的地址
+    // 这里使用到了java中的IO 输入输出
+    public static boolean mp3ToPcm(String mp3filepath, String pcmfilepath) {
+        try {
+            //获取文件的音频流,pcm的格式
+            AudioInputStream audioInputStream = getPcmAudioInputStream(mp3filepath);
+            //将音频转化为  pcm的格式保存下来
+            AudioSystem.write(audioInputStream, AudioFileFormat.Type.WAVE, new File(pcmfilepath));
+            return true;
+        } catch (IOException e) {
+            // TODO Auto-generated catch block
+            e.printStackTrace();
+            return false;
+        }
+    }
+
+    /**
+     * 获取MP3音频流
+     *
+     * @param mp3filepath
+     * @return
+     */
+    private static AudioInputStream getPcmAudioInputStream(String mp3filepath) {
+        File mp3 = new File(mp3filepath);
+        AudioInputStream audioInputStream = null;
+        AudioFormat targetFormat = null;
+        try {
+            AudioInputStream in = null;
+            //读取音频文件的类
+            MpegAudioFileReader mp = new MpegAudioFileReader();
+            in = mp.getAudioInputStream(mp3);
+            AudioFormat baseFormat = in.getFormat();
+            //设定输出格式为pcm格式的音频文件
+            targetFormat = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, baseFormat.getSampleRate(), 16,
+                                           baseFormat.getChannels(), baseFormat.getChannels() * 2, baseFormat.getSampleRate(), false);
+            //输出到音频
+            audioInputStream = AudioSystem.getAudioInputStream(targetFormat, in);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return audioInputStream;
+    }
+}

+ 9 - 6
consumer-service-demo/spring-ai-demo/src/main/resources/application.properties

@@ -1,10 +1,13 @@
-spring.application.name=spring-ai-demo
-
 server.port=8090
-
-# \u8FDE\u63A5openai\u7684\u5730\u5740
+spring.application.name=spring-ai-demo
+# open ai \u4EE3\u7406\u7F51\u5740
 spring.ai.openai.base-url=https://api.xty.app
-# \u8FDE\u63A5openai\u7684token
+# \u7F51\u5740 token
 spring.ai.openai.api-key=sk-aLTR8cque07DSSqVA072596f1d2e4365Ad8e27B733AaD12b
-# \u8FDE\u63A5openai\u7684\u6A21\u578B
+# \u6307\u5B9A\u6A21\u578B
 #spring.ai.openai.chat.options.model=
+
+# \u767E\u5EA6\u667A\u80FD\u4E91 token
+baidu.app-id=92186539
+baidu.api-key=ADNygQQHZIL2gdP65iTPbsVr
+baidu.secret-key=GbnfopuRdqWckID4ZUAdBkwRadNanDNK

+ 6 - 0
consumer-service-demo/spring-ai-demo/src/test/java/com/sf/ai/ListTests.java

@@ -20,4 +20,10 @@ public class ListTests {
         List subList = list.subList(2, 6);
         System.out.println(subList);
     }
+
+    @Test
+    public void test2() {
+        String path = System.getProperty("user.dir");
+        System.out.println(path);
+    }
 }