From 98a71b38ec2aa5712a46020672f44250d688fda5 Mon Sep 17 00:00:00 2001 From: caixypromise Date: Wed, 7 May 2025 23:22:02 +0800 Subject: [PATCH 1/2] =?UTF-8?q?update:=20=E5=AF=B9=E5=A4=A7=E8=AF=AD?= =?UTF-8?q?=E8=A8=80=E6=A8=A1=E5=9E=8B=E5=AF=B9=E8=AF=9D=E5=9B=9E=E5=A4=8D?= =?UTF-8?q?=E5=8E=BB=E6=8E=89think=E3=80=82=20issue(#1050)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../xiaozhi-server/core/providers/tts/base.py | 6 ++- main/xiaozhi-server/core/utils/tts.py | 42 ++++++++++++++----- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/main/xiaozhi-server/core/providers/tts/base.py b/main/xiaozhi-server/core/providers/tts/base.py index 4cf37edb6..c2d4e85cb 100644 --- a/main/xiaozhi-server/core/providers/tts/base.py +++ b/main/xiaozhi-server/core/providers/tts/base.py @@ -4,6 +4,7 @@ from abc import ABC, abstractmethod from core.utils.tts import MarkdownCleaner from core.utils.util import audio_to_opus_data +from core.utils.tts import TextFormater TAG = __name__ logger = setup_logging() @@ -22,7 +23,10 @@ def to_tts(self, text): tmp_file = self.generate_filename() try: max_repeat_time = 5 - text = MarkdownCleaner.clean_markdown(text) + # 判断清理markdown标记并且判断文本是否是关键字,否则不生成tts + text = TextFormater.format_text(text) + if text is None or text == "": + return None while not os.path.exists(tmp_file) and max_repeat_time > 0: try: asyncio.run(self.text_to_speak(text, tmp_file)) diff --git a/main/xiaozhi-server/core/utils/tts.py b/main/xiaozhi-server/core/utils/tts.py index eb3836672..f258e6694 100644 --- a/main/xiaozhi-server/core/utils/tts.py +++ b/main/xiaozhi-server/core/utils/tts.py @@ -18,12 +18,22 @@ def create_instance(class_name, *args, **kwargs): raise ValueError(f"不支持的TTS类型: {class_name},请检查该配置的type是否设置正确") -class MarkdownCleaner: +class TextFormater: """ - 封装 Markdown 清理逻辑:直接用 MarkdownCleaner.clean_markdown(text) 即可 + 文本格式化类,用于封装Markdown清理逻辑,直接用 TextFormater.format_text(text)即可 """ # 公式字符 - NORMAL_FORMULA_CHARS = re.compile(r'[a-zA-Z\\^_{}\+\-\(\)\[\]=]') + __NORMAL_FORMULA_CHARS = re.compile(r'[a-zA-Z\\^_{}\+\-\(\)\[\]=]') + # 需要排除的关键字列表 + __EXCLUDED_KEYWORDS = {'', ''} + + @classmethod + def NORMAL_FORMULA_CHARS(cls): + return cls.__NORMAL_FORMULA_CHARS + + @classmethod + def EXCLUDED_KEYWORDS(cls): + return cls.__EXCLUDED_KEYWORDS @staticmethod def _replace_inline_dollar(m: re.Match) -> str: @@ -33,7 +43,7 @@ def _replace_inline_dollar(m: re.Match) -> str: - 否则 (纯数字/货币等) => 保留 "$...$" """ content = m.group(1) - if MarkdownCleaner.NORMAL_FORMULA_CHARS.search(content): + if TextFormater.NORMAL_FORMULA_CHARS().search(content): return content else: return m.group(0) @@ -79,8 +89,7 @@ def _replace_table_block(match: re.Match) -> str: return "\n".join(lines_for_tts) + "\n" - # 预编译所有正则表达式(按执行频率排序) - # 这里要把 replace_xxx 的静态方法放在最前定义,以便在列表里能正确引用它们。 + # 预编译所有markdown正则表达式(按执行频率排序) REGEXES = [ (re.compile(r'```.*?```', re.DOTALL), ''), # 代码块 (re.compile(r'^#+\s*', re.MULTILINE), ''), # 标题 @@ -100,13 +109,26 @@ def _replace_table_block(match: re.Match) -> str: _replace_inline_dollar ), (re.compile(r'\n{2,}'), '\n'), # 多余空行 + # 排除标签 + (re.compile(r'.*?', re.DOTALL), ''), ] @staticmethod - def clean_markdown(text: str) -> str: + def _clean_markdown(text: str) -> str: """ - 主入口方法:依序执行所有正则,移除或替换 Markdown 元素 + 依序执行所有正则,移除或替换 Markdown 元素 """ - for regex, replacement in MarkdownCleaner.REGEXES: + for regex, replacement in TextFormater.REGEXES: text = regex.sub(replacement, text) - return text.strip() \ No newline at end of file + return text.strip() + + @staticmethod + def format_text(text: str) -> str | None: + """ + 格式化文本,清理markdown标记 + :param text: 待格式化的文本 + :return: 格式化后的文本,如果文本为空或包含关键字,则返回None, 上层逻辑需要处理None的情况 + """ + if not text or text in TextFormater.EXCLUDED_KEYWORDS(): + return None + return TextFormater._clean_markdown(text) From fdbb7517b34ed9186d1d95a2736aee2b51d192a0 Mon Sep 17 00:00:00 2001 From: caixypromise Date: Wed, 7 May 2025 23:28:44 +0800 Subject: [PATCH 2/2] =?UTF-8?q?update:=20=E5=AF=B9=E5=A4=A7=E8=AF=AD?= =?UTF-8?q?=E8=A8=80=E6=A8=A1=E5=9E=8B=E5=AF=B9=E8=AF=9D=E5=9B=9E=E5=A4=8D?= =?UTF-8?q?=E5=8E=BB=E6=8E=89think=E3=80=82=20chore:=20=E4=BF=AE=E6=94=B9g?= =?UTF-8?q?emini=E5=88=A4=E6=96=ADproxy=E5=AD=97=E7=AC=A6=E4=B8=B2?= =?UTF-8?q?=E8=AF=AD=E6=B3=95=20issue(#1050)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main/xiaozhi-server/core/providers/llm/gemini/gemini.py | 3 +-- main/xiaozhi-server/core/providers/tts/base.py | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/main/xiaozhi-server/core/providers/llm/gemini/gemini.py b/main/xiaozhi-server/core/providers/llm/gemini/gemini.py index a91a6bf00..e4794b682 100644 --- a/main/xiaozhi-server/core/providers/llm/gemini/gemini.py +++ b/main/xiaozhi-server/core/providers/llm/gemini/gemini.py @@ -25,8 +25,7 @@ def __init__(self, config): # 初始化Gemini客户端 # 配置代理(如果提供了代理配置) self.proxies = None - if self.http_proxy is not "" or self.https_proxy is not "": - + if self.http_proxy or self.https_proxy: self.proxies = { "http": self.http_proxy, "https": self.https_proxy, diff --git a/main/xiaozhi-server/core/providers/tts/base.py b/main/xiaozhi-server/core/providers/tts/base.py index c2d4e85cb..f1a8ee0f5 100644 --- a/main/xiaozhi-server/core/providers/tts/base.py +++ b/main/xiaozhi-server/core/providers/tts/base.py @@ -2,7 +2,6 @@ from config.logger import setup_logging import os from abc import ABC, abstractmethod -from core.utils.tts import MarkdownCleaner from core.utils.util import audio_to_opus_data from core.utils.tts import TextFormater