diff --git a/main/xiaozhi-server/core/providers/llm/gemini/gemini.py b/main/xiaozhi-server/core/providers/llm/gemini/gemini.py index a91a6bf00..e4794b682 100644 --- a/main/xiaozhi-server/core/providers/llm/gemini/gemini.py +++ b/main/xiaozhi-server/core/providers/llm/gemini/gemini.py @@ -25,8 +25,7 @@ def __init__(self, config): # 初始化Gemini客户端 # 配置代理(如果提供了代理配置) self.proxies = None - if self.http_proxy is not "" or self.https_proxy is not "": - + if self.http_proxy or self.https_proxy: self.proxies = { "http": self.http_proxy, "https": self.https_proxy, diff --git a/main/xiaozhi-server/core/providers/tts/base.py b/main/xiaozhi-server/core/providers/tts/base.py index f2632c02a..b994f52b6 100644 --- a/main/xiaozhi-server/core/providers/tts/base.py +++ b/main/xiaozhi-server/core/providers/tts/base.py @@ -2,8 +2,8 @@ from config.logger import setup_logging import os from abc import ABC, abstractmethod -from core.utils.tts import MarkdownCleaner -from core.utils.util import audio_to_data +from core.utils.util import audio_to_opus_data +from core.utils.tts import TextFormater TAG = __name__ logger = setup_logging() @@ -22,7 +22,10 @@ def to_tts(self, text): tmp_file = self.generate_filename() try: max_repeat_time = 5 - text = MarkdownCleaner.clean_markdown(text) + # 判断清理markdown标记并且判断文本是否是关键字,否则不生成tts + text = TextFormater.format_text(text) + if text is None or text == "": + return None while not os.path.exists(tmp_file) and max_repeat_time > 0: try: asyncio.run(self.text_to_speak(text, tmp_file)) diff --git a/main/xiaozhi-server/core/utils/tts.py b/main/xiaozhi-server/core/utils/tts.py index eb3836672..f258e6694 100644 --- a/main/xiaozhi-server/core/utils/tts.py +++ b/main/xiaozhi-server/core/utils/tts.py @@ -18,12 +18,22 @@ def create_instance(class_name, *args, **kwargs): raise ValueError(f"不支持的TTS类型: {class_name},请检查该配置的type是否设置正确") -class MarkdownCleaner: +class TextFormater: """ - 封装 Markdown 清理逻辑:直接用 MarkdownCleaner.clean_markdown(text) 即可 + 文本格式化类,用于封装Markdown清理逻辑,直接用 TextFormater.format_text(text)即可 """ # 公式字符 - NORMAL_FORMULA_CHARS = re.compile(r'[a-zA-Z\\^_{}\+\-\(\)\[\]=]') + __NORMAL_FORMULA_CHARS = re.compile(r'[a-zA-Z\\^_{}\+\-\(\)\[\]=]') + # 需要排除的关键字列表 + __EXCLUDED_KEYWORDS = {'', ''} + + @classmethod + def NORMAL_FORMULA_CHARS(cls): + return cls.__NORMAL_FORMULA_CHARS + + @classmethod + def EXCLUDED_KEYWORDS(cls): + return cls.__EXCLUDED_KEYWORDS @staticmethod def _replace_inline_dollar(m: re.Match) -> str: @@ -33,7 +43,7 @@ def _replace_inline_dollar(m: re.Match) -> str: - 否则 (纯数字/货币等) => 保留 "$...$" """ content = m.group(1) - if MarkdownCleaner.NORMAL_FORMULA_CHARS.search(content): + if TextFormater.NORMAL_FORMULA_CHARS().search(content): return content else: return m.group(0) @@ -79,8 +89,7 @@ def _replace_table_block(match: re.Match) -> str: return "\n".join(lines_for_tts) + "\n" - # 预编译所有正则表达式(按执行频率排序) - # 这里要把 replace_xxx 的静态方法放在最前定义,以便在列表里能正确引用它们。 + # 预编译所有markdown正则表达式(按执行频率排序) REGEXES = [ (re.compile(r'```.*?```', re.DOTALL), ''), # 代码块 (re.compile(r'^#+\s*', re.MULTILINE), ''), # 标题 @@ -100,13 +109,26 @@ def _replace_table_block(match: re.Match) -> str: _replace_inline_dollar ), (re.compile(r'\n{2,}'), '\n'), # 多余空行 + # 排除标签 + (re.compile(r'.*?', re.DOTALL), ''), ] @staticmethod - def clean_markdown(text: str) -> str: + def _clean_markdown(text: str) -> str: """ - 主入口方法:依序执行所有正则,移除或替换 Markdown 元素 + 依序执行所有正则,移除或替换 Markdown 元素 """ - for regex, replacement in MarkdownCleaner.REGEXES: + for regex, replacement in TextFormater.REGEXES: text = regex.sub(replacement, text) - return text.strip() \ No newline at end of file + return text.strip() + + @staticmethod + def format_text(text: str) -> str | None: + """ + 格式化文本,清理markdown标记 + :param text: 待格式化的文本 + :return: 格式化后的文本,如果文本为空或包含关键字,则返回None, 上层逻辑需要处理None的情况 + """ + if not text or text in TextFormater.EXCLUDED_KEYWORDS(): + return None + return TextFormater._clean_markdown(text)