-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Open
Labels
Description
Describe the bug
我尝试在win11上运行cosyvoice,写了一些代码尝试进行tts,出现ModuleNotFoundError: No module named 'ttsfrd' 错误,我看文档,应该是可以不安装这个库的,文档说ttsfrd不可用的时候会使用WeTextProcessing,但我在代码里也没找到关于WeTextProcessing的代码,怎样才能在win11上运行cosyvoice?
To Reproduce
运行下列脚本
import re
import torch
import os
from http import HTTPStatus
from typing import List, Optional, Tuple, Dict
from uuid import uuid4
import sys
sys.path.insert(1, "../cosyvoice")
sys.path.insert(1, "../sensevoice")
sys.path.insert(1, "../cosyvoice/third_party/AcademiCodec")
sys.path.insert(1, "../cosyvoice/third_party/Matcha-TTS")
sys.path.insert(1, "../")
from cosyvoice.cli.cosyvoice import CosyVoice
from cosyvoice.utils.file_utils import load_wav
import librosa
import torchaudio
prompt_sr, target_sr = 16000, 22050
cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-Instruct')
prompt_text = '希望你以后能够做的比我还好呦。' # 录制音频的文本格式
max_val = 0.8
def postprocess(speech, top_db=60, hop_length=220, win_length=440):
speech, _ = librosa.effects.trim(
speech, top_db=top_db,
frame_length=win_length,
hop_length=hop_length
)
if speech.abs().max() > max_val:
speech = speech / speech.abs().max() * max_val
speech = torch.concat([speech, torch.zeros(1, int(target_sr * 0.2))], dim=1)
return speech
DOCTOR_TIMBRES_DICT = {
"毛医生": postprocess(load_wav('../3.wav', prompt_sr)),
"向医生": postprocess(load_wav('../2.wav', prompt_sr)),
}
tts_speeches = []
def text_to_speech_v2(text, doctor):
pattern = r"生成风格:\s*([^;]+);播报内容:\s*(.+)"
match = re.search(pattern, text)
if match:
style = match.group(1).strip()
content = match.group(2).strip()
tts_text = f"{style}<endofprompt>{content}"
print(f"生成风格: {style}")
print(f"播报内容: {content}")
else:
print("No match found")
tts_text = text
model_input = cosyvoice.frontend.frontend_zero_shot(tts_text, prompt_text, DOCTOR_TIMBRES_DICT[doctor])
model_output = cosyvoice.model.inference(**model_input)
tts_speeches.append(model_output['tts_speech'])
output = {'tts_speech': torch.concat(tts_speeches, dim=1)}
return target_sr, output['tts_speech']
file_name = f"2025213.wav"
print(f'file path is:{file_name}')
# 将文本写入文件
t = "啊啊啊我好烦啊,头又疼肚子又痒脑壳又痛,脑壳发昏脚又伤,谁来救救可怜的我啊,我请他喝肥宅快乐水啊啊啊啊啊,嘤嘤嘤"
sample_rate, speech_data = text_to_speech_v2(t, "向医生")
torchaudio.save(file_name, speech_data, sample_rate)
Expected behavior
正常生成语音
Screenshots
Desktop (please complete the following information):
- OS: Windows 11
Additional context
error log:
PS D:\repo\funaudiollm-app-2025214\FunAudioLLM-APP> d:; cd 'd:\repo\funaudiollm-app-2025214\FunAudioLLM-APP'; & 'c:\Users\xjv11\.conda\envs\funaudiollm_app\python.exe' 'c:\Users\xjv11\.vscode\extensions\ms-python.debugpy-2025.0.1-win32-x64\bundled\libs\debugpy\launcher' '51322' '--' 'D:\repo\funaudiollm-app-2025214\FunAudioLLM-APP\cosyvoice\demo.py'
c:\Users\xjv11\.conda\envs\funaudiollm_app\Lib\site-packages\onnxruntime\capi\onnxruntime_validation.py:26: UserWarning: Unsupported Windows version (11). ONNX Runtime supports Windows 10 and above, only.
warnings.warn(
Traceback (most recent call last):
File "c:\Users\xjv11\.conda\envs\funaudiollm_app\Lib\runpy.py", line 198, in _run_module_as_main
return _run_code(code, main_globals, None,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\xjv11\.conda\envs\funaudiollm_app\Lib\runpy.py", line 88, in _run_code
exec(code, run_globals)
File "c:\Users\xjv11\.vscode\extensions\ms-python.debugpy-2025.0.1-win32-x64\bundled\libs\debugpy\launcher/../..\debugpy\__main__.py", line 71, in <module>
cli.main()
File "c:\Users\xjv11\.vscode\extensions\ms-python.debugpy-2025.0.1-win32-x64\bundled\libs\debugpy\launcher/../..\debugpy/..\debugpy\server\cli.py", line 501, in main
run()
File "c:\Users\xjv11\.vscode\extensions\ms-python.debugpy-2025.0.1-win32-x64\bundled\libs\debugpy\launcher/../..\debugpy/..\debugpy\server\cli.py", line 351, in run_file
runpy.run_path(target, run_name="__main__")
File "c:\Users\xjv11\.vscode\extensions\ms-python.debugpy-2025.0.1-win32-x64\bundled\libs\debugpy\_vendored\pydevd\_pydevd_bundle\pydevd_runpy.py", line 310, in run_path
return _run_module_code(code, init_globals, run_name, pkg_name=pkg_name, script_name=fname)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\xjv11\.vscode\extensions\ms-python.debugpy-2025.0.1-win32-x64\bundled\libs\debugpy\_vendored\pydevd\_pydevd_bundle\pydevd_runpy.py", line 127, in _run_module_code
_run_code(code, mod_globals, init_globals, mod_name, mod_spec, pkg_name, script_name)
File "c:\Users\xjv11\.vscode\extensions\ms-python.debugpy-2025.0.1-win32-x64\bundled\libs\debugpy\_vendored\pydevd\_pydevd_bundle\pydevd_runpy.py", line 118, in _run_code
exec(code, run_globals)
File "D:\repo\funaudiollm-app-2025214\FunAudioLLM-APP\cosyvoice\demo.py", line 14, in <module>
from cosyvoice.cli.cosyvoice import CosyVoice
File "D:\repo\funaudiollm-app-2025214\FunAudioLLM-APP\cosyvoice\cosyvoice\cli\cosyvoice.py", line 18, in <module>
from cosyvoice.cli.frontend import CosyVoiceFrontEnd
File "D:\repo\funaudiollm-app-2025214\FunAudioLLM-APP\cosyvoice\cosyvoice\cli\frontend.py", line 24, in <module>
import ttsfrd
ModuleNotFoundError: No module named 'ttsfrd'
