Skip to content

Commit cfabc33

Browse files
authored
Merge pull request #98 from vritser/main
fix(audio): merge audio files
2 parents ab68a10 + 9d56bc1 commit cfabc33

File tree

2 files changed

+41
-49
lines changed

2 files changed

+41
-49
lines changed

src/main/java/com/xiaozhi/dialogue/service/DialogueService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,7 @@ private void saveAssistantResponse(ChatSession session) {
715715

716716
// 合并音频文件
717717
if (!audioFilesToMerge.isEmpty()) {
718-
String mergedAudioPath = AudioUtils.AUDIO_PATH + AudioUtils.mergeWavFiles(audioFilesToMerge);
718+
String mergedAudioPath = AudioUtils.AUDIO_PATH + AudioUtils.mergeAudioFiles(audioFilesToMerge);
719719

720720
// 保存合并后的音频路径
721721
session.setAssistantAudioPath(mergedAudioPath);

src/main/java/com/xiaozhi/utils/AudioUtils.java

Lines changed: 40 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import java.io.InputStream;
1111
import java.nio.file.Files;
1212
import java.nio.file.Paths;
13+
import java.util.ArrayList;
1314
import java.util.List;
1415
import java.util.UUID;
1516

@@ -25,7 +26,7 @@ public class AudioUtils {
2526

2627
/**
2728
* 将原始音频数据保存为MP3文件
28-
*
29+
*
2930
* @param audio PCM音频数据
3031
* @return 文件名
3132
*/
@@ -102,9 +103,9 @@ public static String saveAsMp3(byte[] audio) {
102103
}
103104
}
104105

105-
/**
106+
/**
106107
* 将原始音频数据保存为WAV文件
107-
*
108+
*
108109
* @param audioData 音频数据
109110
* @return 文件名
110111
*/
@@ -118,9 +119,9 @@ public static String saveAsWav(byte[] audioData) {
118119
try {
119120
// 确保音频目录存在
120121
Files.createDirectories(Paths.get(AUDIO_PATH));
121-
122+
122123
try (FileOutputStream fos = new FileOutputStream(filePath);
123-
DataOutputStream dos = new DataOutputStream(fos)) {
124+
DataOutputStream dos = new DataOutputStream(fos)) {
124125

125126
// 写入WAV文件头
126127
// RIFF头
@@ -155,50 +156,43 @@ public static String saveAsWav(byte[] audioData) {
155156
}
156157

157158
/**
158-
* 合并多个WAV文件为一个WAV文件
159-
*
160-
* @param wavPaths 要合并的WAV文件路径列表
159+
* 合并多个音频文件为一个WAV文件
160+
* 支持合并的格式: wav, mp3, pcm
161+
*
162+
* @param audioPaths 要合并的音频文件路径列表
161163
* @return 合并后的WAV文件名
162164
*/
163-
public static String mergeWavFiles(List<String> wavPaths) {
164-
165-
if (wavPaths.size() == 1) {
166-
// 如果只有一个文件,直接返回该文件名
167-
String singlePath = wavPaths.get(0);
168-
if (singlePath.startsWith(AUDIO_PATH)) {
169-
return singlePath.substring(AUDIO_PATH.length());
170-
}
171-
return singlePath;
165+
public static String mergeAudioFiles(List<String> audioPaths) {
166+
if (audioPaths.size() == 1) {
167+
return Paths.get(audioPaths.getFirst()).getFileName().toString();
172168
}
173-
174-
String uuid = UUID.randomUUID().toString().replace("-", "");
175-
String outputFileName = uuid + ".wav";
176-
String outputPath = AUDIO_PATH + outputFileName;
177-
169+
var uuid = UUID.randomUUID().toString().replace("-", "");
170+
var outputFileName = uuid + ".wav";
171+
var outputPath = Paths.get(AUDIO_PATH, outputFileName).toString();
172+
178173
try {
179-
// 确保音频目录存在
180-
Files.createDirectories(Paths.get(AUDIO_PATH));
181-
182174
// 计算所有PCM数据的总大小
183-
long totalPcmSize = 0;
184-
for (String wavPath : wavPaths) {
185-
String fullPath = wavPath.startsWith(AUDIO_PATH) ? wavPath : AUDIO_PATH + wavPath;
186-
byte[] pcmData = wavToPcm(fullPath);
175+
var totalPcmSize = 0L;
176+
var audioChunks = new ArrayList<byte[]>();
177+
for (var audioPath : audioPaths) {
178+
var fullPath = audioPath.startsWith(AUDIO_PATH) ? audioPath : AUDIO_PATH + audioPath;
179+
byte[] pcmData = readAsPcm(fullPath);
187180
totalPcmSize += pcmData.length;
181+
audioChunks.add(pcmData);
188182
}
189-
183+
190184
// 创建输出WAV文件
191185
try (FileOutputStream fos = new FileOutputStream(outputPath);
192-
DataOutputStream dos = new DataOutputStream(fos)) {
193-
186+
DataOutputStream dos = new DataOutputStream(fos)) {
187+
194188
// 写入WAV文件头
195189
int bitsPerSample = 16; // 16位采样
196-
190+
197191
// RIFF头
198192
dos.writeBytes("RIFF");
199-
dos.writeInt(Integer.reverseBytes(36 + (int)totalPcmSize)); // 文件长度
193+
dos.writeInt(Integer.reverseBytes(36 + (int) totalPcmSize)); // 文件长度
200194
dos.writeBytes("WAVE");
201-
195+
202196
// fmt子块
203197
dos.writeBytes("fmt ");
204198
dos.writeInt(Integer.reverseBytes(16)); // 子块大小
@@ -208,29 +202,27 @@ public static String mergeWavFiles(List<String> wavPaths) {
208202
dos.writeInt(Integer.reverseBytes(SAMPLE_RATE * CHANNELS * bitsPerSample / 8)); // 字节率
209203
dos.writeShort(Short.reverseBytes((short) (CHANNELS * bitsPerSample / 8))); // 块对齐
210204
dos.writeShort(Short.reverseBytes((short) bitsPerSample)); // 每个样本的位数
211-
205+
212206
// data子块
213207
dos.writeBytes("data");
214-
dos.writeInt(Integer.reverseBytes((int)totalPcmSize)); // 数据大小
215-
208+
dos.writeInt(Integer.reverseBytes((int) totalPcmSize)); // 数据大小
209+
216210
// 依次写入每个文件的PCM数据
217-
for (String wavPath : wavPaths) {
218-
String fullPath = wavPath.startsWith(AUDIO_PATH) ? wavPath : AUDIO_PATH + wavPath;
219-
byte[] pcmData = wavToPcm(fullPath);
211+
for (var pcmData : audioChunks) {
220212
dos.write(pcmData);
221213
}
222214
}
223-
215+
224216
return outputFileName;
225217
} catch (Exception e) {
226-
logger.error("合并WAV文件时发生错误", e);
218+
logger.error("合并音频文件时发生错误", e);
227219
return null;
228220
}
229221
}
230222

231223
/**
232224
* 从WAV文件中提取PCM数据
233-
*
225+
*
234226
* @param wavPath WAV文件路径
235227
* @return PCM数据字节数组
236228
*/
@@ -242,7 +234,7 @@ public static byte[] wavToPcm(String wavPath) throws IOException {
242234

243235
/**
244236
* 从WAV字节数据中提取PCM数据
245-
*
237+
*
246238
* @param wavData WAV文件的字节数据
247239
* @return PCM数据字节数组
248240
*/
@@ -282,7 +274,7 @@ public static byte[] wavBytesToPcm(byte[] wavData) throws IOException {
282274

283275
/**
284276
* 从文件读取PCM数据,自动处理WAV和MP3格式
285-
*
277+
*
286278
* @param filePath 音频文件路径
287279
* @return PCM数据字节数组
288280
*/
@@ -301,7 +293,7 @@ public static byte[] readAsPcm(String filePath) throws IOException {
301293

302294
/**
303295
* 将MP3转换为PCM格式
304-
*
296+
*
305297
* @param mp3Path MP3文件路径
306298
* @return PCM数据字节数组
307299
*/
@@ -356,7 +348,7 @@ public static byte[] mp3ToPcm(String mp3Path) throws IOException {
356348

357349
/**
358350
* 检测音频文件格式并返回MIME类型
359-
*
351+
*
360352
* @param filePath 音频文件路径
361353
* @return MIME类型字符串
362354
*/

0 commit comments

Comments
 (0)