From 3c220990e4fed37af04835ac2559161baed9f42f Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Sat, 26 Jul 2025 10:07:58 +0000 Subject: [PATCH 01/27] fix: Dockerfile to reduce vulnerabilities The following vulnerabilities are fixed with an upgrade: - https://snyk.io/vuln/SNYK-DEBIAN12-GNUTLS28-10690985 - https://snyk.io/vuln/SNYK-DEBIAN12-GNUTLS28-10690987 - https://snyk.io/vuln/SNYK-DEBIAN12-GNUTLS28-10690990 - https://snyk.io/vuln/SNYK-DEBIAN12-GNUTLS28-10690993 - https://snyk.io/vuln/SNYK-DEBIAN12-ZLIB-6008963 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index ef979a37c..6e6d24419 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # 使用官方的 Python 基础镜像 -FROM python:3.10-slim AS base +FROM python:3.14.0rc1-slim AS base # 设置工作目录 WORKDIR /app From fc9256dff5a87cd1b218f6d63229099ba250b1cb Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:11:47 +0800 Subject: [PATCH 02/27] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E6=B8=A9=E5=BA=A6=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- base.py | 37 +++++++++++++++++++++++++++++ deepseek.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++ ollama_client.py | 61 ++++++++++++++++++++++++++++++++++++++++++++++++ openai.py | 41 ++++++++++++++++++++++++++++++++ qwen.py | 43 ++++++++++++++++++++++++++++++++++ zhipuai.py | 40 +++++++++++++++++++++++++++++++ 6 files changed, 282 insertions(+) create mode 100644 base.py create mode 100644 deepseek.py create mode 100644 ollama_client.py create mode 100644 openai.py create mode 100644 qwen.py create mode 100644 zhipuai.py diff --git a/base.py b/base.py new file mode 100644 index 000000000..e2978a412 --- /dev/null +++ b/base.py @@ -0,0 +1,37 @@ +from abc import abstractmethod +from typing import List, Dict, Optional +import os + +from biz.llm.types import NotGiven, NOT_GIVEN +from biz.utils.log import logger + + +class BaseClient: + """ Base class for chat models client. """ + + def __init__(self): + # 从环境变量获取默认温度设置 + self.default_temperature = float(os.getenv("LLM_TEMPERATURE", "0.3")) + + def ping(self) -> bool: + """Ping the model to check connectivity.""" + try: + result = self.completions(messages=[{"role": "user", "content": '请仅返回 "ok"。'}]) + return result and result == 'ok' + except Exception: + logger.error("尝试连接LLM失败, {e}") + return False + + @abstractmethod + def completions(self, + messages: List[Dict[str, str]], + model: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + ) -> str: + """Chat with the model. + + Args: + messages: List of message dictionaries with 'role' and 'content' + model: Model name to use + temperature: Controls randomness in the response (0.0 to 2.0) + """ diff --git a/deepseek.py b/deepseek.py new file mode 100644 index 000000000..43efd7d4e --- /dev/null +++ b/deepseek.py @@ -0,0 +1,60 @@ +import os +from typing import Dict, List, Optional + +from openai import OpenAI + +from biz.llm.client.base import BaseClient +from biz.llm.types import NotGiven, NOT_GIVEN +from biz.utils.log import logger + + +class DeepSeekClient(BaseClient): + def __init__(self, api_key: str = None): + super().__init__() # 调用父类初始化 + self.api_key = api_key or os.getenv("DEEPSEEK_API_KEY") + self.base_url = os.getenv("DEEPSEEK_API_BASE_URL", "https://api.deepseek.com") + if not self.api_key: + raise ValueError("API key is required. Please provide it or set it in the environment variables.") + + self.client = OpenAI(api_key=self.api_key, base_url=self.base_url) # DeepSeek supports OpenAI API SDK + self.default_model = os.getenv("DEEPSEEK_API_MODEL", "deepseek-chat") + + def completions(self, + messages: List[Dict[str, str]], + model: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + ) -> str: + try: + model = model or self.default_model + temperature = temperature if temperature is not NOT_GIVEN else self.default_temperature + + # 处理None值,使用默认温度 + if temperature is None: + temperature = self.default_temperature + + # 确保温度值在有效范围内 + temperature = max(0.0, min(2.0, temperature)) + + logger.debug(f"Sending request to DeepSeek API. Model: {model}, Temperature: {temperature}, Messages: {messages}") + + completion = self.client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature + ) + + if not completion or not completion.choices: + logger.error("Empty response from DeepSeek API") + return "AI服务返回为空,请稍后重试" + + return completion.choices[0].message.content + + except Exception as e: + logger.error(f"DeepSeek API error: {str(e)}") + # 检查是否是认证错误 + if "401" in str(e): + return "DeepSeek API认证失败,请检查API密钥是否正确" + elif "404" in str(e): + return "DeepSeek API接口未找到,请检查API地址是否正确" + else: + return f"调用DeepSeek API时出错: {str(e)}" diff --git a/ollama_client.py b/ollama_client.py new file mode 100644 index 000000000..a70374dd3 --- /dev/null +++ b/ollama_client.py @@ -0,0 +1,61 @@ +import os +import re +from typing import Dict, List, Optional + +from ollama import ChatResponse +from ollama import Client + +from biz.llm.client.base import BaseClient +from biz.llm.types import NotGiven, NOT_GIVEN + + +class OllamaClient(BaseClient): + def __init__(self, api_key: str = None): + super().__init__() # 调用父类初始化 + self.default_model = self.default_model = os.getenv("OLLAMA_API_MODEL", "deepseek-r1-8k:14b") + self.base_url = os.getenv("OLLAMA_API_BASE_URL", "http://127.0.0.1:11434") + self.client = Client( + host=self.base_url, + ) + + def _extract_content(self, content: str) -> str: + """ + 从内容中提取...标签之外的部分。 + + Args: + content (str): 原始内容。 + + Returns: + str: 提取后的内容。 + """ + if "" in content and "" not in content: + # 大模型回复的时候,思考链有可能截断,那么果断忽略回复,返回空 + return "COT ABORT!" + elif "" not in content and "" in content: + return content.split("", 1)[1].strip() + elif re.search(r'.*?', content, re.DOTALL): + return re.sub(r'.*?', '', content, flags=re.DOTALL).strip() + return content + + def completions(self, + messages: List[Dict[str, str]], + model: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + ) -> str: + model = model or self.default_model + temperature = temperature if temperature is not NOT_GIVEN else self.default_temperature + + # 处理None值,使用默认温度 + if temperature is None: + temperature = self.default_temperature + + # 确保温度值在有效范围内 + temperature = max(0.0, min(2.0, temperature)) + + response: ChatResponse = self.client.chat( + model=model, + messages=messages, + options={"temperature": temperature} + ) + content = response['message']['content'] + return self._extract_content(content) diff --git a/openai.py b/openai.py new file mode 100644 index 000000000..73b4284ce --- /dev/null +++ b/openai.py @@ -0,0 +1,41 @@ +import os +from typing import Dict, List, Optional + +from openai import OpenAI + +from biz.llm.client.base import BaseClient +from biz.llm.types import NotGiven, NOT_GIVEN + + +class OpenAIClient(BaseClient): + def __init__(self, api_key: str = None): + super().__init__() # 调用父类初始化 + self.api_key = api_key or os.getenv("OPENAI_API_KEY") + self.base_url = os.getenv("OPENAI_API_BASE_URL", "https://api.openai.com") + if not self.api_key: + raise ValueError("API key is required. Please provide it or set it in the environment variables.") + + self.client = OpenAI(api_key=self.api_key, base_url=self.base_url) + self.default_model = os.getenv("OPENAI_API_MODEL", "gpt-4o-mini") + + def completions(self, + messages: List[Dict[str, str]], + model: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + ) -> str: + model = model or self.default_model + temperature = temperature if temperature is not NOT_GIVEN else self.default_temperature + + # 处理None值,使用默认温度 + if temperature is None: + temperature = self.default_temperature + + # 确保温度值在有效范围内 + temperature = max(0.0, min(2.0, temperature)) + + completion = self.client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + ) + return completion.choices[0].message.content diff --git a/qwen.py b/qwen.py new file mode 100644 index 000000000..e662b0e0e --- /dev/null +++ b/qwen.py @@ -0,0 +1,43 @@ +import os +from typing import Dict, List, Optional + +from openai import OpenAI + +from biz.llm.client.base import BaseClient +from biz.llm.types import NotGiven, NOT_GIVEN + + +class QwenClient(BaseClient): + def __init__(self, api_key: str = None): + super().__init__() # 调用父类初始化 + self.api_key = api_key or os.getenv("QWEN_API_KEY") + self.base_url = os.getenv("QWEN_API_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1") + if not self.api_key: + raise ValueError("API key is required. Please provide it or set it in the environment variables.") + + self.client = OpenAI(api_key=self.api_key, base_url=self.base_url) + self.default_model = os.getenv("QWEN_API_MODEL", "qwen-coder-plus") + self.extra_body={"enable_thinking": False} + + def completions(self, + messages: List[Dict[str, str]], + model: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + ) -> str: + model = model or self.default_model + temperature = temperature if temperature is not NOT_GIVEN else self.default_temperature + + # 处理None值,使用默认温度 + if temperature is None: + temperature = self.default_temperature + + # 确保温度值在有效范围内 + temperature = max(0.0, min(2.0, temperature)) + + completion = self.client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + extra_body=self.extra_body, + ) + return completion.choices[0].message.content diff --git a/zhipuai.py b/zhipuai.py new file mode 100644 index 000000000..ff6680e92 --- /dev/null +++ b/zhipuai.py @@ -0,0 +1,40 @@ +import os +from typing import Dict, List, Optional + +from zhipuai import ZhipuAI + +from biz.llm.client.base import BaseClient +from biz.llm.types import NotGiven, NOT_GIVEN + + +class ZhipuAIClient(BaseClient): + def __init__(self, api_key: str = None): + super().__init__() # 调用父类初始化 + self.api_key = api_key or os.getenv("ZHIPUAI_API_KEY") + if not self.api_key: + raise ValueError("API key is required. Please provide it or set it in the environment variables.") + + self.client = ZhipuAI(api_key=api_key) + self.default_model = os.getenv("ZHIPUAI_API_MODEL", "GLM-4-Flash") + + def completions(self, + messages: List[Dict[str, str]], + model: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + ) -> str: + model = model or self.default_model + temperature = temperature if temperature is not NOT_GIVEN else self.default_temperature + + # 处理None值,使用默认温度 + if temperature is None: + temperature = self.default_temperature + + # 确保温度值在有效范围内 + temperature = max(0.0, min(2.0, temperature)) + + completion = self.client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + ) + return completion.choices[0].message.content From c9a1e8fb5c35157bf4db8adb828d447f38f2f26e Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:13:16 +0800 Subject: [PATCH 03/27] Delete ollama_client.py --- ollama_client.py | 61 ------------------------------------------------ 1 file changed, 61 deletions(-) delete mode 100644 ollama_client.py diff --git a/ollama_client.py b/ollama_client.py deleted file mode 100644 index a70374dd3..000000000 --- a/ollama_client.py +++ /dev/null @@ -1,61 +0,0 @@ -import os -import re -from typing import Dict, List, Optional - -from ollama import ChatResponse -from ollama import Client - -from biz.llm.client.base import BaseClient -from biz.llm.types import NotGiven, NOT_GIVEN - - -class OllamaClient(BaseClient): - def __init__(self, api_key: str = None): - super().__init__() # 调用父类初始化 - self.default_model = self.default_model = os.getenv("OLLAMA_API_MODEL", "deepseek-r1-8k:14b") - self.base_url = os.getenv("OLLAMA_API_BASE_URL", "http://127.0.0.1:11434") - self.client = Client( - host=self.base_url, - ) - - def _extract_content(self, content: str) -> str: - """ - 从内容中提取...标签之外的部分。 - - Args: - content (str): 原始内容。 - - Returns: - str: 提取后的内容。 - """ - if "" in content and "" not in content: - # 大模型回复的时候,思考链有可能截断,那么果断忽略回复,返回空 - return "COT ABORT!" - elif "" not in content and "" in content: - return content.split("", 1)[1].strip() - elif re.search(r'.*?', content, re.DOTALL): - return re.sub(r'.*?', '', content, flags=re.DOTALL).strip() - return content - - def completions(self, - messages: List[Dict[str, str]], - model: Optional[str] | NotGiven = NOT_GIVEN, - temperature: Optional[float] | NotGiven = NOT_GIVEN, - ) -> str: - model = model or self.default_model - temperature = temperature if temperature is not NOT_GIVEN else self.default_temperature - - # 处理None值,使用默认温度 - if temperature is None: - temperature = self.default_temperature - - # 确保温度值在有效范围内 - temperature = max(0.0, min(2.0, temperature)) - - response: ChatResponse = self.client.chat( - model=model, - messages=messages, - options={"temperature": temperature} - ) - content = response['message']['content'] - return self._extract_content(content) From a75aa380a7fa2ade39a703f772f942a76337751b Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:13:30 +0800 Subject: [PATCH 04/27] Delete deepseek.py --- deepseek.py | 60 ----------------------------------------------------- 1 file changed, 60 deletions(-) delete mode 100644 deepseek.py diff --git a/deepseek.py b/deepseek.py deleted file mode 100644 index 43efd7d4e..000000000 --- a/deepseek.py +++ /dev/null @@ -1,60 +0,0 @@ -import os -from typing import Dict, List, Optional - -from openai import OpenAI - -from biz.llm.client.base import BaseClient -from biz.llm.types import NotGiven, NOT_GIVEN -from biz.utils.log import logger - - -class DeepSeekClient(BaseClient): - def __init__(self, api_key: str = None): - super().__init__() # 调用父类初始化 - self.api_key = api_key or os.getenv("DEEPSEEK_API_KEY") - self.base_url = os.getenv("DEEPSEEK_API_BASE_URL", "https://api.deepseek.com") - if not self.api_key: - raise ValueError("API key is required. Please provide it or set it in the environment variables.") - - self.client = OpenAI(api_key=self.api_key, base_url=self.base_url) # DeepSeek supports OpenAI API SDK - self.default_model = os.getenv("DEEPSEEK_API_MODEL", "deepseek-chat") - - def completions(self, - messages: List[Dict[str, str]], - model: Optional[str] | NotGiven = NOT_GIVEN, - temperature: Optional[float] | NotGiven = NOT_GIVEN, - ) -> str: - try: - model = model or self.default_model - temperature = temperature if temperature is not NOT_GIVEN else self.default_temperature - - # 处理None值,使用默认温度 - if temperature is None: - temperature = self.default_temperature - - # 确保温度值在有效范围内 - temperature = max(0.0, min(2.0, temperature)) - - logger.debug(f"Sending request to DeepSeek API. Model: {model}, Temperature: {temperature}, Messages: {messages}") - - completion = self.client.chat.completions.create( - model=model, - messages=messages, - temperature=temperature - ) - - if not completion or not completion.choices: - logger.error("Empty response from DeepSeek API") - return "AI服务返回为空,请稍后重试" - - return completion.choices[0].message.content - - except Exception as e: - logger.error(f"DeepSeek API error: {str(e)}") - # 检查是否是认证错误 - if "401" in str(e): - return "DeepSeek API认证失败,请检查API密钥是否正确" - elif "404" in str(e): - return "DeepSeek API接口未找到,请检查API地址是否正确" - else: - return f"调用DeepSeek API时出错: {str(e)}" From e366ce3695b9fd306fda28cfb8944954e8bc7c9c Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:13:39 +0800 Subject: [PATCH 05/27] Delete base.py --- base.py | 37 ------------------------------------- 1 file changed, 37 deletions(-) delete mode 100644 base.py diff --git a/base.py b/base.py deleted file mode 100644 index e2978a412..000000000 --- a/base.py +++ /dev/null @@ -1,37 +0,0 @@ -from abc import abstractmethod -from typing import List, Dict, Optional -import os - -from biz.llm.types import NotGiven, NOT_GIVEN -from biz.utils.log import logger - - -class BaseClient: - """ Base class for chat models client. """ - - def __init__(self): - # 从环境变量获取默认温度设置 - self.default_temperature = float(os.getenv("LLM_TEMPERATURE", "0.3")) - - def ping(self) -> bool: - """Ping the model to check connectivity.""" - try: - result = self.completions(messages=[{"role": "user", "content": '请仅返回 "ok"。'}]) - return result and result == 'ok' - except Exception: - logger.error("尝试连接LLM失败, {e}") - return False - - @abstractmethod - def completions(self, - messages: List[Dict[str, str]], - model: Optional[str] | NotGiven = NOT_GIVEN, - temperature: Optional[float] | NotGiven = NOT_GIVEN, - ) -> str: - """Chat with the model. - - Args: - messages: List of message dictionaries with 'role' and 'content' - model: Model name to use - temperature: Controls randomness in the response (0.0 to 2.0) - """ From b7445db24d2fa5b4a5b4c80b3c90645dd20b79df Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:13:49 +0800 Subject: [PATCH 06/27] Delete openai.py --- openai.py | 41 ----------------------------------------- 1 file changed, 41 deletions(-) delete mode 100644 openai.py diff --git a/openai.py b/openai.py deleted file mode 100644 index 73b4284ce..000000000 --- a/openai.py +++ /dev/null @@ -1,41 +0,0 @@ -import os -from typing import Dict, List, Optional - -from openai import OpenAI - -from biz.llm.client.base import BaseClient -from biz.llm.types import NotGiven, NOT_GIVEN - - -class OpenAIClient(BaseClient): - def __init__(self, api_key: str = None): - super().__init__() # 调用父类初始化 - self.api_key = api_key or os.getenv("OPENAI_API_KEY") - self.base_url = os.getenv("OPENAI_API_BASE_URL", "https://api.openai.com") - if not self.api_key: - raise ValueError("API key is required. Please provide it or set it in the environment variables.") - - self.client = OpenAI(api_key=self.api_key, base_url=self.base_url) - self.default_model = os.getenv("OPENAI_API_MODEL", "gpt-4o-mini") - - def completions(self, - messages: List[Dict[str, str]], - model: Optional[str] | NotGiven = NOT_GIVEN, - temperature: Optional[float] | NotGiven = NOT_GIVEN, - ) -> str: - model = model or self.default_model - temperature = temperature if temperature is not NOT_GIVEN else self.default_temperature - - # 处理None值,使用默认温度 - if temperature is None: - temperature = self.default_temperature - - # 确保温度值在有效范围内 - temperature = max(0.0, min(2.0, temperature)) - - completion = self.client.chat.completions.create( - model=model, - messages=messages, - temperature=temperature, - ) - return completion.choices[0].message.content From d70f4fd4f83a09959298d6895b3086477e92e8ee Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:13:58 +0800 Subject: [PATCH 07/27] Delete qwen.py --- qwen.py | 43 ------------------------------------------- 1 file changed, 43 deletions(-) delete mode 100644 qwen.py diff --git a/qwen.py b/qwen.py deleted file mode 100644 index e662b0e0e..000000000 --- a/qwen.py +++ /dev/null @@ -1,43 +0,0 @@ -import os -from typing import Dict, List, Optional - -from openai import OpenAI - -from biz.llm.client.base import BaseClient -from biz.llm.types import NotGiven, NOT_GIVEN - - -class QwenClient(BaseClient): - def __init__(self, api_key: str = None): - super().__init__() # 调用父类初始化 - self.api_key = api_key or os.getenv("QWEN_API_KEY") - self.base_url = os.getenv("QWEN_API_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1") - if not self.api_key: - raise ValueError("API key is required. Please provide it or set it in the environment variables.") - - self.client = OpenAI(api_key=self.api_key, base_url=self.base_url) - self.default_model = os.getenv("QWEN_API_MODEL", "qwen-coder-plus") - self.extra_body={"enable_thinking": False} - - def completions(self, - messages: List[Dict[str, str]], - model: Optional[str] | NotGiven = NOT_GIVEN, - temperature: Optional[float] | NotGiven = NOT_GIVEN, - ) -> str: - model = model or self.default_model - temperature = temperature if temperature is not NOT_GIVEN else self.default_temperature - - # 处理None值,使用默认温度 - if temperature is None: - temperature = self.default_temperature - - # 确保温度值在有效范围内 - temperature = max(0.0, min(2.0, temperature)) - - completion = self.client.chat.completions.create( - model=model, - messages=messages, - temperature=temperature, - extra_body=self.extra_body, - ) - return completion.choices[0].message.content From 3bb4b8d98b93a21ea836070748d1a04d0fc1d3d6 Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:14:07 +0800 Subject: [PATCH 08/27] Delete zhipuai.py --- zhipuai.py | 40 ---------------------------------------- 1 file changed, 40 deletions(-) delete mode 100644 zhipuai.py diff --git a/zhipuai.py b/zhipuai.py deleted file mode 100644 index ff6680e92..000000000 --- a/zhipuai.py +++ /dev/null @@ -1,40 +0,0 @@ -import os -from typing import Dict, List, Optional - -from zhipuai import ZhipuAI - -from biz.llm.client.base import BaseClient -from biz.llm.types import NotGiven, NOT_GIVEN - - -class ZhipuAIClient(BaseClient): - def __init__(self, api_key: str = None): - super().__init__() # 调用父类初始化 - self.api_key = api_key or os.getenv("ZHIPUAI_API_KEY") - if not self.api_key: - raise ValueError("API key is required. Please provide it or set it in the environment variables.") - - self.client = ZhipuAI(api_key=api_key) - self.default_model = os.getenv("ZHIPUAI_API_MODEL", "GLM-4-Flash") - - def completions(self, - messages: List[Dict[str, str]], - model: Optional[str] | NotGiven = NOT_GIVEN, - temperature: Optional[float] | NotGiven = NOT_GIVEN, - ) -> str: - model = model or self.default_model - temperature = temperature if temperature is not NOT_GIVEN else self.default_temperature - - # 处理None值,使用默认温度 - if temperature is None: - temperature = self.default_temperature - - # 确保温度值在有效范围内 - temperature = max(0.0, min(2.0, temperature)) - - completion = self.client.chat.completions.create( - model=model, - messages=messages, - temperature=temperature, - ) - return completion.choices[0].message.content From 7921075ac5d53e7031f4059964d8524d5d2f7775 Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:16:10 +0800 Subject: [PATCH 09/27] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E6=B8=A9=E5=BA=A6=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- biz/llm/client/base.py | 11 +++++++++++ biz/llm/client/deepseek.py | 16 ++++++++++++++-- biz/llm/client/ollama_client.py | 18 +++++++++++++++++- biz/llm/client/openai.py | 12 ++++++++++++ biz/llm/client/qwen.py | 12 ++++++++++++ biz/llm/client/zhipuai.py | 12 ++++++++++++ 6 files changed, 78 insertions(+), 3 deletions(-) diff --git a/biz/llm/client/base.py b/biz/llm/client/base.py index b83c36e92..e2978a412 100644 --- a/biz/llm/client/base.py +++ b/biz/llm/client/base.py @@ -1,5 +1,6 @@ from abc import abstractmethod from typing import List, Dict, Optional +import os from biz.llm.types import NotGiven, NOT_GIVEN from biz.utils.log import logger @@ -8,6 +9,10 @@ class BaseClient: """ Base class for chat models client. """ + def __init__(self): + # 从环境变量获取默认温度设置 + self.default_temperature = float(os.getenv("LLM_TEMPERATURE", "0.3")) + def ping(self) -> bool: """Ping the model to check connectivity.""" try: @@ -21,6 +26,12 @@ def ping(self) -> bool: def completions(self, messages: List[Dict[str, str]], model: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, ) -> str: """Chat with the model. + + Args: + messages: List of message dictionaries with 'role' and 'content' + model: Model name to use + temperature: Controls randomness in the response (0.0 to 2.0) """ diff --git a/biz/llm/client/deepseek.py b/biz/llm/client/deepseek.py index 9cd63b5d7..43efd7d4e 100644 --- a/biz/llm/client/deepseek.py +++ b/biz/llm/client/deepseek.py @@ -10,6 +10,7 @@ class DeepSeekClient(BaseClient): def __init__(self, api_key: str = None): + super().__init__() # 调用父类初始化 self.api_key = api_key or os.getenv("DEEPSEEK_API_KEY") self.base_url = os.getenv("DEEPSEEK_API_BASE_URL", "https://api.deepseek.com") if not self.api_key: @@ -21,14 +22,25 @@ def __init__(self, api_key: str = None): def completions(self, messages: List[Dict[str, str]], model: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, ) -> str: try: model = model or self.default_model - logger.debug(f"Sending request to DeepSeek API. Model: {model}, Messages: {messages}") + temperature = temperature if temperature is not NOT_GIVEN else self.default_temperature + + # 处理None值,使用默认温度 + if temperature is None: + temperature = self.default_temperature + + # 确保温度值在有效范围内 + temperature = max(0.0, min(2.0, temperature)) + + logger.debug(f"Sending request to DeepSeek API. Model: {model}, Temperature: {temperature}, Messages: {messages}") completion = self.client.chat.completions.create( model=model, - messages=messages + messages=messages, + temperature=temperature ) if not completion or not completion.choices: diff --git a/biz/llm/client/ollama_client.py b/biz/llm/client/ollama_client.py index 1574f9ac5..a70374dd3 100644 --- a/biz/llm/client/ollama_client.py +++ b/biz/llm/client/ollama_client.py @@ -11,6 +11,7 @@ class OllamaClient(BaseClient): def __init__(self, api_key: str = None): + super().__init__() # 调用父类初始化 self.default_model = self.default_model = os.getenv("OLLAMA_API_MODEL", "deepseek-r1-8k:14b") self.base_url = os.getenv("OLLAMA_API_BASE_URL", "http://127.0.0.1:11434") self.client = Client( @@ -39,7 +40,22 @@ def _extract_content(self, content: str) -> str: def completions(self, messages: List[Dict[str, str]], model: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, ) -> str: - response: ChatResponse = self.client.chat(model or self.default_model, messages) + model = model or self.default_model + temperature = temperature if temperature is not NOT_GIVEN else self.default_temperature + + # 处理None值,使用默认温度 + if temperature is None: + temperature = self.default_temperature + + # 确保温度值在有效范围内 + temperature = max(0.0, min(2.0, temperature)) + + response: ChatResponse = self.client.chat( + model=model, + messages=messages, + options={"temperature": temperature} + ) content = response['message']['content'] return self._extract_content(content) diff --git a/biz/llm/client/openai.py b/biz/llm/client/openai.py index 69d35f172..73b4284ce 100644 --- a/biz/llm/client/openai.py +++ b/biz/llm/client/openai.py @@ -9,6 +9,7 @@ class OpenAIClient(BaseClient): def __init__(self, api_key: str = None): + super().__init__() # 调用父类初始化 self.api_key = api_key or os.getenv("OPENAI_API_KEY") self.base_url = os.getenv("OPENAI_API_BASE_URL", "https://api.openai.com") if not self.api_key: @@ -20,10 +21,21 @@ def __init__(self, api_key: str = None): def completions(self, messages: List[Dict[str, str]], model: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, ) -> str: model = model or self.default_model + temperature = temperature if temperature is not NOT_GIVEN else self.default_temperature + + # 处理None值,使用默认温度 + if temperature is None: + temperature = self.default_temperature + + # 确保温度值在有效范围内 + temperature = max(0.0, min(2.0, temperature)) + completion = self.client.chat.completions.create( model=model, messages=messages, + temperature=temperature, ) return completion.choices[0].message.content diff --git a/biz/llm/client/qwen.py b/biz/llm/client/qwen.py index 14e03a9dd..e662b0e0e 100644 --- a/biz/llm/client/qwen.py +++ b/biz/llm/client/qwen.py @@ -9,6 +9,7 @@ class QwenClient(BaseClient): def __init__(self, api_key: str = None): + super().__init__() # 调用父类初始化 self.api_key = api_key or os.getenv("QWEN_API_KEY") self.base_url = os.getenv("QWEN_API_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1") if not self.api_key: @@ -21,11 +22,22 @@ def __init__(self, api_key: str = None): def completions(self, messages: List[Dict[str, str]], model: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, ) -> str: model = model or self.default_model + temperature = temperature if temperature is not NOT_GIVEN else self.default_temperature + + # 处理None值,使用默认温度 + if temperature is None: + temperature = self.default_temperature + + # 确保温度值在有效范围内 + temperature = max(0.0, min(2.0, temperature)) + completion = self.client.chat.completions.create( model=model, messages=messages, + temperature=temperature, extra_body=self.extra_body, ) return completion.choices[0].message.content diff --git a/biz/llm/client/zhipuai.py b/biz/llm/client/zhipuai.py index 0790cd97f..ff6680e92 100644 --- a/biz/llm/client/zhipuai.py +++ b/biz/llm/client/zhipuai.py @@ -9,6 +9,7 @@ class ZhipuAIClient(BaseClient): def __init__(self, api_key: str = None): + super().__init__() # 调用父类初始化 self.api_key = api_key or os.getenv("ZHIPUAI_API_KEY") if not self.api_key: raise ValueError("API key is required. Please provide it or set it in the environment variables.") @@ -19,10 +20,21 @@ def __init__(self, api_key: str = None): def completions(self, messages: List[Dict[str, str]], model: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, ) -> str: model = model or self.default_model + temperature = temperature if temperature is not NOT_GIVEN else self.default_temperature + + # 处理None值,使用默认温度 + if temperature is None: + temperature = self.default_temperature + + # 确保温度值在有效范围内 + temperature = max(0.0, min(2.0, temperature)) + completion = self.client.chat.completions.create( model=model, messages=messages, + temperature=temperature, ) return completion.choices[0].message.content From 21b5a60c588acab8bc13bfa2bc16d7686ffa6041 Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:20:03 +0800 Subject: [PATCH 10/27] =?UTF-8?q?=E5=A2=9E=E5=8A=A0RAG=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E5=AE=A1=E6=9F=A5=E5=99=A8=E3=80=81=E7=9F=A5=E8=AF=86=E5=BA=93?= =?UTF-8?q?=E7=AE=A1=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- biz/utils/knowledge_base.py | 674 +++++++++++++++++++++++++++++++++ biz/utils/rag_code_reviewer.py | 196 ++++++++++ 2 files changed, 870 insertions(+) create mode 100644 biz/utils/knowledge_base.py create mode 100644 biz/utils/rag_code_reviewer.py diff --git a/biz/utils/knowledge_base.py b/biz/utils/knowledge_base.py new file mode 100644 index 000000000..4cde3ca3b --- /dev/null +++ b/biz/utils/knowledge_base.py @@ -0,0 +1,674 @@ +import os +import json +import uuid +from typing import List, Dict, Any, Optional +from pathlib import Path +import hashlib +import chromadb +from chromadb.config import Settings +from sentence_transformers import SentenceTransformer +import PyPDF2 +from docx import Document +import markdown +from bs4 import BeautifulSoup +import requests +import yaml +from biz.utils.log import logger +import re + + +class DocumentProcessor: + """文档处理器,支持多种文档格式""" + + @staticmethod + def extract_text_from_pdf(file_path: str) -> str: + """从PDF文件提取文本""" + try: + with open(file_path, 'rb') as file: + reader = PyPDF2.PdfReader(file) + text = "" + for page in reader.pages: + text += page.extract_text() + "\n" + return text.strip() + except Exception as e: + logger.error(f"PDF文件处理失败: {e}") + return "" + + @staticmethod + def extract_text_from_docx(file_path: str) -> str: + """从Word文档提取文本""" + try: + doc = Document(file_path) + text = "" + for paragraph in doc.paragraphs: + text += paragraph.text + "\n" + return text.strip() + except Exception as e: + logger.error(f"Word文档处理失败: {e}") + return "" + + @staticmethod + def extract_text_from_md(file_path: str) -> str: + """从Markdown文件提取文本""" + try: + with open(file_path, 'r', encoding='utf-8') as file: + md_content = file.read() + html = markdown.markdown(md_content) + soup = BeautifulSoup(html, 'html.parser') + return soup.get_text().strip() + except Exception as e: + logger.error(f"Markdown文件处理失败: {e}") + return "" + + @staticmethod + def extract_text_from_txt(file_path: str) -> str: + """从文本文件提取内容""" + try: + with open(file_path, 'r', encoding='utf-8') as file: + return file.read().strip() + except Exception as e: + logger.error(f"文本文件处理失败: {e}") + return "" + + @classmethod + def process_document(cls, file_path: str) -> str: + """根据文件类型处理文档""" + ext = Path(file_path).suffix.lower() + + if ext == '.pdf': + return cls.extract_text_from_pdf(file_path) + elif ext == '.docx': + return cls.extract_text_from_docx(file_path) + elif ext == '.md': + return cls.extract_text_from_md(file_path) + elif ext in ['.txt', '.py', '.js', '.java', '.cpp', '.c', '.go']: + return cls.extract_text_from_txt(file_path) + else: + logger.warning(f"不支持的文件类型: {ext}") + return "" + + +class TextSplitter: + """文本分割器""" + + def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200): + self.chunk_size = chunk_size + self.chunk_overlap = chunk_overlap + + def split_text(self, text: str) -> List[str]: + """将文本分割成块""" + if len(text) <= self.chunk_size: + return [text] + + chunks = [] + start = 0 + + while start < len(text): + end = start + self.chunk_size + + # 尝试在句子边界分割 + if end < len(text): + # 寻找最近的句号、问号或感叹号 + sentence_ends = ['.', '?', '!', '\n', '。', '?', '!'] + for i in range(end, max(start + self.chunk_size - 200, start), -1): + if text[i] in sentence_ends: + end = i + 1 + break + + chunk = text[start:end].strip() + if chunk: + chunks.append(chunk) + + start = end - self.chunk_overlap + + return chunks + + +class KnowledgeBase: + """知识库管理器""" + + def __init__(self, db_path: str = "data/knowledge_base"): + self.db_path = db_path + self.client = chromadb.PersistentClient( + path=db_path, + settings=Settings(allow_reset=True) + ) + # 使用本地模型路径 + project_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + model_path = os.path.join(project_root, 'model', 'all-MiniLM-L6-v2') + + # 检查本地模型是否存在 + if os.path.exists(model_path): + logger.info(f"使用本地模型: {model_path}") + self.model = SentenceTransformer(model_path) + else: + logger.warning(f"本地模型路径不存在: {model_path}, 使用在线模型") + self.model = SentenceTransformer('all-MiniLM-L6-v2') + self.text_splitter = TextSplitter() + self.doc_processor = DocumentProcessor() + + # 创建集合,使用余弦相似度 + self.custom_collection = self._get_or_create_collection( + "custom_knowledge", + metadata={"hnsw:space": "cosine"} # 使用余弦相似度 + ) + self.builtin_collection = self._get_or_create_collection( + "builtin_knowledge", + metadata={"hnsw:space": "cosine"} # 使用余弦相似度 + ) + + # 检查是否需要初始化内置知识库 + config = self._load_builtin_config() + # 可以通过环境变量禁用自动初始化 + auto_init = os.getenv("AUTO_INIT_BUILTIN_KNOWLEDGE", "1") == "1" + if config.get("settings", {}).get("auto_init", True) and auto_init: + # 检查内置集合是否为空 + try: + existing_docs = self.builtin_collection.get(include=["metadatas"]) + # 更严格的检查:确保真的有文档内容 + if not existing_docs['metadatas'] or len(existing_docs['metadatas']) == 0: + logger.info("内置知识库为空,开始初始化...") + self._init_builtin_knowledge() + else: + # 检查是否有有效的文档(不是空文档) + valid_docs = [doc for doc in existing_docs['metadatas'] if doc.get('title') and doc.get('title').strip()] + if not valid_docs: + logger.info("内置知识库中没有有效文档,开始初始化...") + self._init_builtin_knowledge() + else: + logger.info(f"内置知识库已存在 {len(valid_docs)} 个有效文档,跳过初始化") + except Exception as e: + logger.warning(f"检查内置知识库状态失败: {e},跳过自动初始化") + else: + logger.info("自动初始化内置知识库已禁用") + + def _get_or_create_collection(self, name: str, metadata: dict = None): + """获取或创建集合""" + try: + return self.client.get_collection(name) + except: + return self.client.create_collection( + name, + metadata=metadata + ) + + def _load_builtin_config(self) -> Dict[str, Any]: + """加载内置知识库配置""" + config_path = "conf/builtin_knowledge.yml" + try: + with open(config_path, 'r', encoding='utf-8') as f: + return yaml.safe_load(f) + except FileNotFoundError: + logger.warning(f"配置文件不存在: {config_path},使用空配置") + return {"builtin_documents": [], "settings": {"enabled": True}} + except Exception as e: + logger.error(f"加载配置文件失败: {e}") + return {"builtin_documents": [], "settings": {"enabled": True}} + + def _init_builtin_knowledge(self): + """从配置文件和文档文件初始化内置知识库""" + config = self._load_builtin_config() + + # 检查是否禁用内置知识库 + if not config.get("settings", {}).get("enabled", True): + logger.info("内置知识库已禁用") + return + + builtin_docs = config.get("builtin_documents", []) + if not builtin_docs: + logger.warning("配置文件中没有找到内置文档配置") + return + + # 加载每个内置文档 + loaded_count = 0 + for doc_config in builtin_docs: + try: + title = doc_config.get("title", "未知文档") + file_path = doc_config.get("file", "") + tags = doc_config.get("tags", []) + + if not file_path: + logger.warning(f"文档 {title} 没有指定文件路径") + continue + + # 检查文件是否存在 + if not os.path.exists(file_path): + logger.warning(f"文档文件不存在: {file_path}") + continue + + # 读取文档内容 + content = self.doc_processor.process_document(file_path) + if not content.strip(): + logger.warning(f"文档 {title} 内容为空") + continue + + # 添加到知识库 + self.add_builtin_document(title, content, tags) + loaded_count += 1 + logger.info(f"✅ 成功加载内置文档: {title}") + + except Exception as e: + logger.error(f"❌ 加载内置文档失败 {doc_config.get('title', '未知')}: {e}") + + logger.info(f"内置知识库初始化完成,成功加载 {loaded_count} 个文档") + + def add_custom_document(self, title: str, file_path: str, tags: List[str] = None) -> str: + """添加自定义文档到知识库""" + try: + # 处理文档 + content = self.doc_processor.process_document(file_path) + if not content: + raise ValueError("文档内容为空") + + return self._add_document(self.custom_collection, title, content, tags or [], "custom") + except Exception as e: + logger.error(f"添加自定义文档失败: {e}") + raise + + def add_builtin_document(self, title: str, content: str, tags: List[str] = None) -> str: + """添加内置文档到知识库""" + return self._add_document(self.builtin_collection, title, content, tags or [], "builtin") + + def _add_document(self, collection, title: str, content: str, tags: List[str], source: str) -> str: + """内部方法:添加文档到指定集合""" + # 分割文本 + chunks = self.text_splitter.split_text(content) + + # 生成文档ID + doc_id = hashlib.md5(f"{title}_{content[:100]}".encode()).hexdigest()[:8] + + # 准备数据 + chunk_ids = [] + chunk_texts = [] + chunk_metadatas = [] + + for i, chunk in enumerate(chunks): + chunk_id = f"{doc_id}_chunk_{i}" + chunk_ids.append(chunk_id) + chunk_texts.append(chunk) + chunk_metadatas.append({ + "doc_id": doc_id, + "title": title, + "chunk_index": i, + "tags": ",".join(tags), + "source": source + }) + + # 向量化并存储 + embeddings = self.model.encode(chunk_texts).tolist() + + collection.add( + ids=chunk_ids, + documents=chunk_texts, + metadatas=chunk_metadatas, + embeddings=embeddings + ) + + logger.info(f"文档已添加: {title}, 分割为 {len(chunks)} 个块") + return doc_id + + def search_relevant_documents(self, query: str, n_results: int = 5, source: str = "all", similarity_threshold: float = 0.0) -> List[Dict[str, Any]]: + """搜索相关文档 + Args: + query: 搜索查询 + n_results: 返回结果数量 + source: 搜索范围,可选值: all, custom, builtin + similarity_threshold: 相似度阈值,取值范围[0,1],只返回相似度大于等于该值的结果 + """ + query_embedding = self.model.encode([query]).tolist() + + results = [] + + # 选择搜索的集合 + collections_to_search = [] + if source in ["all", "custom"]: + collections_to_search.append(("custom", self.custom_collection)) + if source in ["all", "builtin"]: + collections_to_search.append(("builtin", self.builtin_collection)) + + for source_name, collection in collections_to_search: + try: + # 检查集合是否为空 + collection_count = collection.count() + if collection_count == 0: + logger.info(f"{source_name} 集合为空,跳过搜索") + continue + + # 确保n_results大于0 + actual_n_results = max(1, min(n_results, collection_count)) + + search_results = collection.query( + query_embeddings=query_embedding, + n_results=actual_n_results, + include=["documents", "metadatas", "distances"] + ) + + if search_results['documents'] and len(search_results['documents'][0]) > 0: + for i in range(len(search_results['documents'][0])): + similarity_score = 1 - search_results['distances'][0][i] # cosine distance转换为相似度 + # 只添加相似度大于等于阈值的结果 + if similarity_score >= similarity_threshold: + results.append({ + "content": search_results['documents'][0][i], + "metadata": search_results['metadatas'][0][i], + "score": similarity_score, + "source": source_name + }) + except Exception as e: + logger.error(f"搜索 {source_name} 集合失败: {e}") + + # 按相似度排序 + results.sort(key=lambda x: x['score'], reverse=True) + + return results[:n_results] + + def search_relevant_documents_with_full_docs(self, query: str, n_results: int = 5, source: str = "all", similarity_threshold: float = 0.2) -> List[Dict[str, Any]]: + """搜索相关文档,当文档块相似度大于阈值时返回完整文档 + + Args: + query: 搜索查询 + n_results: 返回结果数量 + source: 搜索范围,可选值: all, custom, builtin + similarity_threshold: 相似度阈值,取值范围[0,1],当文档块相似度大于该值时返回完整文档 + + Returns: + List[Dict[str, Any]]: 相关文档列表,包含完整文档内容 + """ + query_embedding = self.model.encode([query]).tolist() + + # 先进行常规搜索获取相关chunk + chunk_results = self.search_relevant_documents(query, n_results * 3, source, similarity_threshold) + + # 收集需要获取完整文档的doc_id + doc_ids_to_fetch = set() + for result in chunk_results: + if result['score'] >= similarity_threshold: + doc_id = result['metadata']['doc_id'] + doc_ids_to_fetch.add(doc_id) + + # 获取完整文档内容 + full_docs = {} + collections_to_search = [] + if source in ["all", "custom"]: + collections_to_search.append(("custom", self.custom_collection)) + if source in ["all", "builtin"]: + collections_to_search.append(("builtin", self.builtin_collection)) + + for source_name, collection in collections_to_search: + try: + # 获取集合中所有数据 + all_data = collection.get(include=["documents", "metadatas"]) + + # 按doc_id分组 + doc_chunks = {} + for i, metadata in enumerate(all_data['metadatas']): + doc_id = metadata['doc_id'] + if doc_id in doc_ids_to_fetch: + if doc_id not in doc_chunks: + doc_chunks[doc_id] = { + 'title': metadata['title'], + 'chunks': [], + 'source': source_name, + 'tags': metadata['tags'] + } + doc_chunks[doc_id]['chunks'].append({ + 'content': all_data['documents'][i], + 'chunk_index': metadata['chunk_index'] + }) + + # 合并chunk并按索引排序 + for doc_id, doc_info in doc_chunks.items(): + doc_info['chunks'].sort(key=lambda x: x['chunk_index']) + full_content = '\n\n'.join([chunk['content'] for chunk in doc_info['chunks']]) + full_docs[doc_id] = { + 'title': doc_info['title'], + 'content': full_content, + 'source': doc_info['source'], + 'tags': doc_info['tags'], + 'chunk_count': len(doc_info['chunks']) + } + + except Exception as e: + logger.error(f"获取 {source_name} 完整文档失败: {e}") + + # 构建最终结果 + results = [] + for result in chunk_results: + doc_id = result['metadata']['doc_id'] + if doc_id in full_docs: + # 使用完整文档内容 + results.append({ + "content": full_docs[doc_id]['content'], + "metadata": { + "doc_id": doc_id, + "title": full_docs[doc_id]['title'], + "tags": full_docs[doc_id]['tags'], + "source": full_docs[doc_id]['source'], + "chunk_count": full_docs[doc_id]['chunk_count'], + "is_full_document": True + }, + "score": result['score'], + "source": full_docs[doc_id]['source'] + }) + # 从full_docs中移除,避免重复 + del full_docs[doc_id] + else: + # 使用原始chunk内容 + results.append(result) + + # 按相似度排序并限制结果数量 + results.sort(key=lambda x: x['score'], reverse=True) + return results[:n_results] + + def get_knowledge_for_code_review(self, code_content: str, similarity_threshold: float = 0.2) -> List[Dict[str, Any]]: + """获取代码审查相关的知识文档 + + Args: + code_content: 代码内容 + similarity_threshold: 相似度阈值,当文档块相似度大于该值时返回完整文档 + + Returns: + List[Dict[str, Any]]: 相关文档列表 + """ + # 定义语言特征规则 + language_patterns = { + "python": { + "keywords": [ + (r"\bdef\s+\w+\s*\(", 3), # 函数定义 + (r"\bclass\s+\w+[:\(]", 3), # 类定义 + (r"\bimport\s+[\w\s,]+", 2), # import语句 + (r"from\s+[\w\.]+\s+import", 2), # from import语句 + (r"@\w+", 1), # 装饰器 + (r":\s*$", 1), # 代码块开始 + (r"__\w+__", 1), # 魔术方法 + (r"self\.", 1), # self引用 + ], + "libraries": ["django", "flask", "requests", "numpy", "pandas", "tensorflow", "pytorch"] + }, + "javascript": { + "keywords": [ + (r"\bconst\s+\w+\s*=", 3), # const声明 + (r"\blet\s+\w+\s*=", 3), # let声明 + (r"=>\s*{", 2), # 箭头函数 + (r"\bfunction\s+\w+\s*\(", 2), # 函数声明 + (r"\bimport\s+.*\bfrom\b", 2), # ES6 import + (r"\bexport\s+", 1), # export语句 + (r"\bawait\b", 1), # async/await + ], + "libraries": ["react", "vue", "angular", "express", "node", "axios"] + }, + "java": { + "keywords": [ + (r"\bclass\s+\w+", 3), # 类定义 + (r"\bpublic\s+|private\s+|protected\s+", 2), # 访问修饰符 + (r"@\w+", 2), # 注解 + (r"\binterface\s+\w+", 2), # 接口定义 + (r"\bextends\s+|\bimplements\s+", 1), # 继承和实现 + ], + "libraries": ["spring", "hibernate", "mybatis", "junit"] + }, + "go": { + "keywords": [ + (r"\bfunc\s+\w+\s*\(", 3), # 函数定义 + (r"\btype\s+\w+\s+struct\b", 3), # 结构体定义 + (r"\bpackage\s+\w+", 2), # 包声明 + (r"\binterface\s*{", 2), # 接口定义 + (r"\bgo\s+", 1), # goroutine + ], + "libraries": ["gin", "gorm", "echo"] + }, + "cpp": { + "keywords": [ + (r"#include\s+[<\"][\w\.]+[>\"]", 3), # include语句 + (r"\bclass\s+\w+", 3), # 类定义 + (r"\btemplate\s*<", 2), # 模板 + (r"::\s*", 1), # 作用域解析 + ], + "libraries": ["boost", "qt", "opencv"] + }, + "html": { + "keywords": [ + (r"<\w+[^>]*>", 2), # HTML标签 + (r"", 1), # 结束标签 + (r"\bclass\s*=\s*[\"']", 1), # class属性 + ], + "libraries": [] + }, + "css": { + "keywords": [ + (r"{\s*[\w\-]+\s*:", 2), # 规则块 + (r"@media\b", 2), # 媒体查询 + (r"#[\w\-]+\s*{", 1), # ID选择器 + ], + "libraries": [] + } + } + + # 检测代码语言特征 + language_scores = {} + for lang, patterns in language_patterns.items(): + score = 0 + # 检查关键字模式 + for pattern, weight in patterns["keywords"]: + matches = len(re.findall(pattern, code_content)) + score += matches * weight + + # 检查常用库 + for lib in patterns["libraries"]: + if lib in code_content.lower(): + score += 2 + + if score > 0: + language_scores[lang] = score + logger.info(f"\n--------{lang} score: {score}--------") + + # 确定主要语言 - 选择得分最高的语言 + primary_language = None + if language_scores: + primary_language = max(language_scores.items(), key=lambda x: x[1])[0] + + logger.info(f"\n--------Primary language: {primary_language}--------") + + # 如果没有检测到任何语言特征,返回空列表 + if not primary_language: + logger.info("No language features detected") + return [] + + # 构建多个搜索查询 + search_queries = [ + f"{primary_language} standards coding best practices", # 基础查询 + f"{primary_language} common pitfalls and solutions", # 常见问题 + f"{primary_language} security guidelines", # 安全指南 + f"{primary_language} performance optimization" # 性能优化 + ] + + # 合并多个查询的结果,使用新的完整文档检索方法 + all_results = [] + for query in search_queries: + results = self.search_relevant_documents_with_full_docs(query, n_results=2, source='all', similarity_threshold=similarity_threshold) + all_results.extend(results) + + # 去重并保留相似度最高的结果 + unique_results = {} + for result in all_results: + doc_id = result['metadata']['doc_id'] + if doc_id not in unique_results or result['score'] > unique_results[doc_id]['score']: + unique_results[doc_id] = result + + # 按相似度排序 + sorted_results = sorted(unique_results.values(), key=lambda x: x['score'], reverse=True) + + return sorted_results + + def list_documents(self, source: str = "all") -> List[Dict[str, Any]]: + """列出所有文档""" + docs = [] + + collections_to_list = [] + if source in ["all", "custom"]: + collections_to_list.append(("custom", self.custom_collection)) + if source in ["all", "builtin"]: + collections_to_list.append(("builtin", self.builtin_collection)) + + for source_name, collection in collections_to_list: + try: + all_data = collection.get(include=["metadatas"]) + + # 按文档分组 + doc_groups = {} + for metadata in all_data['metadatas']: + doc_id = metadata['doc_id'] + if doc_id not in doc_groups: + doc_groups[doc_id] = { + "doc_id": doc_id, + "title": metadata['title'], + "tags": metadata['tags'].split(',') if metadata['tags'] else [], + "source": source_name, + "chunk_count": 0 + } + doc_groups[doc_id]['chunk_count'] += 1 + + docs.extend(list(doc_groups.values())) + except Exception as e: + logger.error(f"列出 {source_name} 文档失败: {e}") + + return docs + + def delete_document(self, doc_id: str, source: str = "custom"): + """删除文档""" + collection = self.custom_collection if source == "custom" else self.builtin_collection + + try: + # 获取该文档的所有chunk + all_data = collection.get(include=["metadatas", "documents"]) + chunk_ids_to_delete = [] + + # 遍历所有元数据,找到匹配的文档ID + for i, metadata in enumerate(all_data['metadatas']): + if metadata.get('doc_id') == doc_id: + chunk_ids_to_delete.append(all_data['ids'][i]) + + if chunk_ids_to_delete: + # 删除所有相关的块 + collection.delete(ids=chunk_ids_to_delete) + logger.info(f"已删除文档 {doc_id},共 {len(chunk_ids_to_delete)} 个块") + else: + logger.warning(f"未找到文档 {doc_id}") + except Exception as e: + logger.error(f"删除文档失败: {e}") + raise + + def clear_builtin_collection(self): + """清空内置文档集合""" + try: + # 获取所有文档 + all_data = self.builtin_collection.get(include=["metadatas"]) + if all_data and all_data['metadatas']: + # 获取所有文档块的ID + chunk_ids = all_data['ids'] + # 删除所有文档 + self.builtin_collection.delete(ids=chunk_ids) + logger.info(f"已清空内置文档集合,共删除 {len(chunk_ids)} 个文档块") + except Exception as e: + logger.error(f"清空内置文档集合失败: {e}") + raise \ No newline at end of file diff --git a/biz/utils/rag_code_reviewer.py b/biz/utils/rag_code_reviewer.py new file mode 100644 index 000000000..98f6b9c55 --- /dev/null +++ b/biz/utils/rag_code_reviewer.py @@ -0,0 +1,196 @@ +import os +from typing import Dict, Any, List, Optional +import yaml +from jinja2 import Template + +from biz.llm.factory import Factory +from biz.utils.log import logger +from biz.utils.token_util import count_tokens, truncate_text_by_tokens +from biz.utils.knowledge_base import KnowledgeBase +from biz.utils.code_reviewer import BaseReviewer, CodeReviewer + + +class RAGCodeReviewer(BaseReviewer): + """基于RAG的代码审查器""" + + def __init__(self): + super().__init__("rag_code_review_prompt") + self.knowledge_base = KnowledgeBase() + self.enable_rag = os.getenv("ENABLE_RAG", "1") == "1" + self.similarity_threshold = float(os.getenv("RAG_SIMILARITY_THRESHOLD", "0.2")) + logger.info(f"RAG功能状态: {'启用' if self.enable_rag else '禁用'}") + logger.info(f"RAG相似度阈值: {self.similarity_threshold}") + + def _load_prompts(self, prompt_key: str, style="professional") -> Dict[str, Any]: + """加载RAG提示词配置""" + prompt_templates_file = "conf/prompt_templates.yml" + try: + with open(prompt_templates_file, "r", encoding="utf-8") as file: + prompts_config = yaml.safe_load(file) + + # 如果没有RAG配置,使用默认的代码审查配置 + if prompt_key not in prompts_config: + prompt_key = "code_review_prompt" + + prompts = prompts_config.get(prompt_key, {}) + + def render_template(template_str: str) -> str: + return Template(template_str).render(style=style) + + system_prompt = render_template(prompts["system_prompt"]) + user_prompt = render_template(prompts["user_prompt"]) + + return { + "system_message": {"role": "system", "content": system_prompt}, + "user_message": {"role": "user", "content": user_prompt}, + } + except (FileNotFoundError, KeyError, yaml.YAMLError) as e: + logger.error(f"加载提示词配置失败: {e}") + # 返回默认提示词 + return self._get_default_prompts() + + def _get_default_prompts(self) -> Dict[str, Any]: + """获取默认的RAG提示词""" + return { + "system_message": { + "role": "system", + "content": """你是一个专业的代码审查专家,具备丰富的软件开发经验。 +你的任务是基于提供的代码变更和相关技术文档,进行全面的代码审查。 + +审查重点: +1. 代码质量和规范性 +2. 潜在的bug和安全问题 +3. 性能优化建议 +4. 架构设计合理性 +5. 基于相关文档的最佳实践建议 + +请使用专业的语言风格,提供具体可行的改进建议。""" + }, + "user_message": { + "role": "user", + "content": """请审查以下代码变更: + +## 代码变更: +{diffs_text} + +## 提交信息: +{commits_text} + +## 相关技术文档: +{relevant_docs} + +请基于代码变更和相关文档,提供详细的审查意见。""" + } + } + + def get_relevant_knowledge(self, code_content: str, similarity_threshold: float = None) -> str: + """获取相关知识文档""" + if not self.enable_rag: + return "" + + # 使用实例的相似度阈值作为默认值 + if similarity_threshold is None: + similarity_threshold = self.similarity_threshold + + try: + relevant_docs = self.knowledge_base.get_knowledge_for_code_review(code_content, similarity_threshold) + + if not relevant_docs: + return "" + + knowledge_text = "\n\n".join([ + f"### {doc['metadata']['title']} (相似度: {doc['score']:.2f}){' [完整文档]' if doc['metadata'].get('is_full_document', False) else ''}\n{doc['content']}" + for doc in relevant_docs + ]) + + logger.info(f"检索到 {len(relevant_docs)} 个相关文档片段") + return knowledge_text + + except Exception as e: + logger.error(f"获取相关知识失败: {e}") + return "" + + def review_and_strip_code(self, changes_text: str, commits_text: str = "", similarity_threshold: float = None, temperature: Optional[float] = None) -> str: + """RAG增强的代码审查""" + if not changes_text: + logger.info("代码为空") + return "代码为空" + + # 使用实例的相似度阈值作为默认值 + if similarity_threshold is None: + similarity_threshold = self.similarity_threshold + + # Token限制处理 + review_max_tokens = int(os.getenv("REVIEW_MAX_TOKENS", 10000)) + tokens_count = count_tokens(changes_text) + if tokens_count > review_max_tokens: + changes_text = truncate_text_by_tokens(changes_text, review_max_tokens) + + # 获取相关知识 + relevant_docs = "" + if self.enable_rag: + relevant_docs = self.get_relevant_knowledge(changes_text, similarity_threshold) + + # 进行审查 + review_result = self.review_code(changes_text, commits_text, relevant_docs, temperature).strip() + + # 清理格式 + if review_result.startswith("```markdown") and review_result.endswith("```"): + return review_result[11:-3].strip() + return review_result + + def review_code(self, diffs_text: str, commits_text: str = "", relevant_docs: str = "", temperature: Optional[float] = None) -> str: + """基于RAG的代码审查""" + # 构建消息 + user_content = self.prompts["user_message"]["content"].format( + diffs_text=diffs_text, + commits_text=commits_text or "无提交信息", + relevant_docs=relevant_docs or "无相关文档" + ) + + messages = [ + self.prompts["system_message"], + { + "role": "user", + "content": user_content + } + ] + + # 打印相关文档信息 + # if relevant_docs: + # logger.info("\n相关文档信息:") + # logger.info(f"\n{'='*50}\n{relevant_docs}\n{'='*50}") + + return self.call_llm(messages, temperature) + + def add_knowledge_document(self, title: str, file_path: str, tags: List[str] = None) -> str: + """添加知识文档""" + try: + doc_id = self.knowledge_base.add_custom_document(title, file_path, tags) + logger.info(f"知识文档已添加: {title}") + return doc_id + except Exception as e: + logger.error(f"添加知识文档失败: {e}") + raise + + def list_knowledge_documents(self) -> List[Dict[str, Any]]: + """列出所有知识文档""" + return self.knowledge_base.list_documents() + + def delete_knowledge_document(self, doc_id: str, source: str = "custom"): + """删除知识文档""" + self.knowledge_base.delete_document(doc_id, source) + logger.info(f"知识文档已删除: {doc_id}, source: {source}") + + def restore_builtin_documents(self): + """恢复所有内置文档""" + # 先清空内置文档集合 + self.knowledge_base.clear_builtin_collection() + # 重新初始化内置文档 + self.knowledge_base._init_builtin_knowledge() + logger.info("内置文档已恢复") + + @staticmethod + def parse_review_score(review_text: str) -> int: + """解析审查评分""" + return CodeReviewer.parse_review_score(review_text) \ No newline at end of file From 720015f332dbbccfc930a16771897c5efc6b5d1b Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:21:12 +0800 Subject: [PATCH 11/27] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E7=9F=A5=E8=AF=86?= =?UTF-8?q?=E5=BA=93API=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- biz/api/knowledge_api.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 biz/api/knowledge_api.py diff --git a/biz/api/knowledge_api.py b/biz/api/knowledge_api.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/biz/api/knowledge_api.py @@ -0,0 +1 @@ + From 43727d18cb2c17b73f5e74e11f44eee2949c30a0 Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:21:42 +0800 Subject: [PATCH 12/27] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E7=9F=A5=E8=AF=86?= =?UTF-8?q?=E5=BA=93API=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- biz/api/knowledge_api.py | 292 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 292 insertions(+) diff --git a/biz/api/knowledge_api.py b/biz/api/knowledge_api.py index 8b1378917..84918f9ed 100644 --- a/biz/api/knowledge_api.py +++ b/biz/api/knowledge_api.py @@ -1 +1,293 @@ +import os +import traceback +from flask import Blueprint, request, jsonify +from werkzeug.utils import secure_filename +from biz.utils.rag_code_reviewer import RAGCodeReviewer +from biz.utils.code_reviewer import CodeReviewer +from biz.utils.log import logger + +knowledge_bp = Blueprint('knowledge', __name__) + +# 允许的文件扩展名 +ALLOWED_EXTENSIONS = {'txt', 'md'} + +def allowed_file(filename): + return '.' in filename and \ + filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS + +@knowledge_bp.route('/upload', methods=['POST']) +def upload_document(): + """上传知识文档""" + try: + if 'file' not in request.files: + return jsonify({'error': '没有文件'}), 400 + + file = request.files['file'] + if file.filename == '': + return jsonify({'error': '没有选择文件'}), 400 + + if not allowed_file(file.filename): + return jsonify({'error': f'不支持的文件类型。请上传 .txt 或 .md 格式的文档文件。'}), 400 + + # 获取标题和标签 + title = request.form.get('title', file.filename) + tags = request.form.get('tags', '').split(',') + tags = [tag.strip() for tag in tags if tag.strip()] + + # 保存文件 + filename = secure_filename(file.filename) + upload_folder = 'data/uploads' + os.makedirs(upload_folder, exist_ok=True) + file_path = os.path.join(upload_folder, filename) + file.save(file_path) + + # 添加到知识库 + reviewer = RAGCodeReviewer() + doc_id = reviewer.add_knowledge_document(title, file_path, tags) + + # 删除临时文件 + os.remove(file_path) + + return jsonify({ + 'message': '文档上传成功', + 'doc_id': doc_id, + 'title': title, + 'tags': tags + }) + + except Exception as e: + logger.error(f"上传文档失败: {e}") + logger.error(traceback.format_exc()) + return jsonify({'error': f'上传失败: {str(e)}'}), 500 + +@knowledge_bp.route('/documents', methods=['GET']) +def list_documents(): + """列出所有知识文档""" + try: + reviewer = RAGCodeReviewer() + documents = reviewer.list_knowledge_documents() + + return jsonify({ + 'documents': documents, + 'total': len(documents) + }) + + except Exception as e: + logger.error(f"获取文档列表失败: {e}") + return jsonify({'error': f'获取失败: {str(e)}'}), 500 + +@knowledge_bp.route('/documents/', methods=['DELETE']) +def delete_document(doc_id): + """删除知识文档""" + try: + source = request.args.get('source', 'custom') # 获取source参数 + reviewer = RAGCodeReviewer() + reviewer.delete_knowledge_document(doc_id, source) + + return jsonify({'message': f'文档 {doc_id} 已删除'}) + + except Exception as e: + logger.error(f"删除文档失败: {e}") + return jsonify({'error': f'删除失败: {str(e)}'}), 500 + +@knowledge_bp.route('/documents/restore', methods=['POST']) +def restore_builtin_documents(): + """恢复所有内置文档""" + try: + reviewer = RAGCodeReviewer() + reviewer.restore_builtin_documents() + + return jsonify({'message': '内置文档已恢复'}) + + except Exception as e: + logger.error(f"恢复内置文档失败: {e}") + return jsonify({'error': f'恢复失败: {str(e)}'}), 500 + +@knowledge_bp.route('/documents/reload', methods=['POST']) +def reload_builtin_documents(): + """重新加载内置文档(清除后重新添加)""" + try: + reviewer = RAGCodeReviewer() + + # 清除内置文档集合 + reviewer.knowledge_base.clear_builtin_collection() + + # 重新初始化内置文档 + reviewer.knowledge_base._init_builtin_knowledge() + + return jsonify({'message': '内置文档已重新加载'}) + + except Exception as e: + logger.error(f"重新加载内置文档失败: {e}") + return jsonify({'error': f'重新加载失败: {str(e)}'}), 500 + +@knowledge_bp.route('/search', methods=['POST']) +def search_documents(): + """搜索相关文档""" + try: + data = request.get_json() + if not data or 'query' not in data: + return jsonify({'error': '缺少查询参数'}), 400 + + query = data['query'] + n_results = data.get('n_results', 5) + source = data.get('source', 'all') # all, custom, builtin + similarity_threshold = float(data.get('similarity_threshold', 0.0)) # 新增相似度阈值参数 + + # 验证相似度阈值范围 + if not 0 <= similarity_threshold <= 1: + return jsonify({'error': '相似度阈值必须在0到1之间'}), 400 + + reviewer = RAGCodeReviewer() + results = reviewer.knowledge_base.search_relevant_documents( + query, n_results, source, similarity_threshold + ) + + # 确保所有返回的结果都满足相似度阈值要求 + filtered_results = [r for r in results if r['score'] >= similarity_threshold] + + return jsonify({ + 'query': query, + 'results': filtered_results, + 'total': len(filtered_results), + 'similarity_threshold': similarity_threshold + }) + + except ValueError as e: + return jsonify({'error': f'参数错误: {str(e)}'}), 400 + except Exception as e: + logger.error(f"搜索文档失败: {e}") + return jsonify({'error': f'搜索失败: {str(e)}'}), 500 + +@knowledge_bp.route('/test_rag', methods=['POST']) +def test_rag(): + """测试RAG功能""" + try: + data = request.get_json() + if not data or 'code' not in data: + return jsonify({'error': '缺少代码参数'}), 400 + + code = data['code'] + commit_message = data.get('commit_message', '') + similarity_threshold = float(data.get('similarity_threshold', 0.2)) # 新增相似度阈值参数 + temperature = float(data.get('temperature', 0.3)) # 新增温度参数 + + # 验证相似度阈值范围 + if not 0 <= similarity_threshold <= 1: + return jsonify({'error': '相似度阈值必须在0到1之间'}), 400 + + # 验证温度范围 + if not 0 <= temperature <= 2: + return jsonify({'error': '温度值必须在0到2之间'}), 400 + + reviewer = RAGCodeReviewer() + + # 获取相关知识 + relevant_docs = reviewer.get_relevant_knowledge(code, similarity_threshold) + + # 进行审查 + review_result = reviewer.review_and_strip_code(code, commit_message, similarity_threshold, temperature) + score = reviewer.parse_review_score(review_result) + + return jsonify({ + 'code': code, + 'commit_message': commit_message, + 'similarity_threshold': similarity_threshold, + 'temperature': temperature, + 'relevant_docs': relevant_docs, + 'review_result': review_result, + 'score': score + }) + + except Exception as e: + logger.error(f"RAG测试失败: {e}") + return jsonify({'error': f'测试失败: {str(e)}'}), 500 + +@knowledge_bp.route('/status', methods=['GET']) +def get_status(): + """获取知识库状态""" + try: + reviewer = RAGCodeReviewer() + documents = reviewer.list_knowledge_documents() + + custom_docs = [doc for doc in documents if doc['source'] == 'custom'] + builtin_docs = [doc for doc in documents if doc['source'] == 'builtin'] + + return jsonify({ + 'rag_enabled': reviewer.enable_rag, + 'total_documents': len(documents), + 'custom_documents': len(custom_docs), + 'builtin_documents': len(builtin_docs), + 'knowledge_base_path': reviewer.knowledge_base.db_path + }) + + except Exception as e: + logger.error(f"获取状态失败: {e}") + return jsonify({'error': f'获取状态失败: {str(e)}'}), 500 + +@knowledge_bp.route('/compare_rag', methods=['POST']) +def compare_rag(): + """对比测试RAG和非RAG的代码审查结果""" + try: + data = request.get_json() + if not data or 'code' not in data: + return jsonify({'error': '缺少代码参数'}), 400 + + code = data['code'] + commit_message = data.get('commit_message', '') + similarity_threshold = float(data.get('similarity_threshold', 0.2)) # 新增相似度阈值参数 + temperature = float(data.get('temperature', 0.3)) # 新增温度参数 + + # 验证相似度阈值范围 + if not 0 <= similarity_threshold <= 1: + return jsonify({'error': '相似度阈值必须在0到1之间'}), 400 + + # 验证温度范围 + if not 0 <= temperature <= 2: + return jsonify({'error': '温度值必须在0到2之间'}), 400 + + # 1. 使用RAG进行审查 + rag_reviewer = RAGCodeReviewer() + + # 获取相关知识 + relevant_docs = rag_reviewer.get_relevant_knowledge(code, similarity_threshold) + + # RAG审查 + rag_review_result = rag_reviewer.review_and_strip_code(code, commit_message, similarity_threshold, temperature) + rag_score = rag_reviewer.parse_review_score(rag_review_result) + + # 2. 使用普通模型进行审查(不使用RAG) + normal_reviewer = CodeReviewer() + normal_review_result = normal_reviewer.review_and_strip_code(code, commit_message, temperature) + normal_score = normal_reviewer.parse_review_score(normal_review_result) + + # 计算实际显示的文档数量 + docs = relevant_docs.split('###') if relevant_docs else [] + actual_docs = [doc for doc in docs if doc.strip()] + + return jsonify({ + 'code': code, + 'commit_message': commit_message, + 'similarity_threshold': similarity_threshold, + 'temperature': temperature, + 'rag_result': { + 'relevant_docs': relevant_docs, + 'review_result': rag_review_result, + 'score': rag_score + }, + 'normal_result': { + 'review_result': normal_review_result, + 'score': normal_score + }, + 'comparison': { + 'score_difference': rag_score - normal_score, + 'has_relevant_docs': bool(relevant_docs.strip()), + 'unique_docs_count': len(actual_docs), # 使用实际显示的文档数 + 'chunks_count': len(actual_docs) # 保持一致性 + } + }) + + except Exception as e: + logger.error(f"对比测试失败: {e}") + return jsonify({'error': f'对比测试失败: {str(e)}'}), 500 From c95b79ad4c5db2bc9ee1222c5c27d074a3927972 Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:22:15 +0800 Subject: [PATCH 13/27] =?UTF-8?q?=E5=A2=9E=E5=8A=A0RAG=E7=AE=A1=E7=90=86?= =?UTF-8?q?=E7=95=8C=E9=9D=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rag_dashboard.py | 1253 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1253 insertions(+) create mode 100644 rag_dashboard.py diff --git a/rag_dashboard.py b/rag_dashboard.py new file mode 100644 index 000000000..d7b25be0e --- /dev/null +++ b/rag_dashboard.py @@ -0,0 +1,1253 @@ + # -*- coding: utf-8 -*- +import streamlit as st +import requests +import json +import os +from datetime import datetime + +# 导入示例代码 +import sys +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from docs.examples.code_examples import ( + html_example as HTML_EXAMPLE, + css_example as CSS_EXAMPLE, + js_example as JS_EXAMPLE, + java_example as JAVA_EXAMPLE, + python_example as PYTHON_EXAMPLE, + cpp_example as CPP_EXAMPLE, + go_example as GO_EXAMPLE +) + +# 设置页面配置 +st.set_page_config( + page_title="RAG代码审查 - 知识库管理", + page_icon="🧠", + layout="wide", + initial_sidebar_state="expanded" +) + +# API基础URL +API_BASE_URL = os.getenv('API_BASE_URL', 'http://localhost:5001') + +def get_knowledge_status(): + """获取知识库状态""" + try: + response = requests.get(f"{API_BASE_URL}/api/knowledge/status") + if response.status_code == 200: + return response.json() + else: + st.error(f"获取状态失败: {response.text}") + return None + except Exception as e: + st.error(f"连接API失败: {e}") + return None + +def list_documents(): + """获取文档列表""" + try: + response = requests.get(f"{API_BASE_URL}/api/knowledge/documents") + if response.status_code == 200: + return response.json() + else: + st.error(f"获取文档列表失败: {response.text}") + return None + except Exception as e: + st.error(f"连接API失败: {e}") + return None + +def search_documents(query, n_results=5, source='all', similarity_threshold=0.0): + """搜索文档""" + try: + data = { + 'query': query, + 'n_results': n_results, + 'source': source, + 'similarity_threshold': similarity_threshold + } + response = requests.post(f"{API_BASE_URL}/api/knowledge/search", json=data) + if response.status_code == 200: + return response.json() + else: + st.error(f"搜索失败: {response.text}") + return None + except Exception as e: + st.error(f"连接API失败: {e}") + return None + +def test_rag(code, commit_message='', temperature=0.3, similarity_threshold=0.2): + """测试RAG功能""" + try: + data = { + 'code': code, + 'commit_message': commit_message, + 'temperature': temperature, + 'similarity_threshold': similarity_threshold + } + response = requests.post(f"{API_BASE_URL}/api/knowledge/test_rag", json=data) + if response.status_code == 200: + return response.json() + else: + st.error(f"RAG测试失败: {response.text}") + return None + except Exception as e: + st.error(f"连接API失败: {e}") + return None + +def compare_rag(code, commit_message='', temperature=0.3, similarity_threshold=0.2): + """对比测试RAG和非RAG功能""" + try: + data = { + 'code': code, + 'commit_message': commit_message, + 'temperature': temperature, + 'similarity_threshold': similarity_threshold + } + response = requests.post(f"{API_BASE_URL}/api/knowledge/compare_rag", json=data) + if response.status_code == 200: + return response.json() + else: + st.error(f"RAG对比测试失败: {response.text}") + return None + except Exception as e: + st.error(f"连接API失败: {e}") + return None + +def upload_document(file_name, content, tags): + """上传文档""" + try: + # 创建临时文件对象 + import io + file_obj = io.BytesIO(content.encode('utf-8')) + file_obj.name = file_name + + files = {'file': (file_name, file_obj, 'text/plain')} + data = { + 'title': file_name, + 'tags': ','.join(tags) if tags else '' + } + response = requests.post(f"{API_BASE_URL}/api/knowledge/upload", files=files, data=data) + if response.status_code == 200: + return response.json() + else: + st.error(f"上传失败: {response.text}") + return None + except Exception as e: + st.error(f"连接API失败: {e}") + return None + +def delete_document(doc_id, source='custom'): + """删除文档""" + try: + response = requests.delete(f"{API_BASE_URL}/api/knowledge/documents/{doc_id}?source={source}") + if response.status_code == 200: + return response.json() + else: + st.error(f"删除失败: {response.text}") + return None + except Exception as e: + st.error(f"连接API失败: {e}") + return None + +def restore_builtin_documents(): + """恢复内置文档""" + try: + response = requests.post(f"{API_BASE_URL}/api/knowledge/documents/restore") + if response.status_code == 200: + return response.json() + else: + st.error(f"恢复失败: {response.text}") + return None + except Exception as e: + st.error(f"连接API失败: {e}") + return None + +def reload_builtin_documents(): + """重新加载内置文档""" + try: + response = requests.post(f"{API_BASE_URL}/api/knowledge/documents/reload") + if response.status_code == 200: + return response.json() + else: + st.error(f"重新加载失败: {response.text}") + return None + except Exception as e: + st.error(f"连接API失败: {e}") + return None + +def generate_markdown_report(result, code, commit_message, report_type="RAG"): + """生成Markdown格式的审查报告""" + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + # 获取模型参数 + temperature = result.get('temperature', 'N/A') + similarity_threshold = result.get('similarity_threshold', 'N/A') + + markdown_content = f"""# 代码审查报告 + +## 基本信息 +- **报告类型**: {report_type}代码审查 +- **生成时间**: {timestamp} + +## 模型参数 +- **模型温度**: {temperature} +- **相似度阈值**: {similarity_threshold} + +## 代码信息 +- **提交信息**: {commit_message if commit_message else '无'} +- **代码长度**: {len(code)} 字符 + +## 代码内容 +```code +{code} +``` + +""" + + # 根据报告类型添加审查结果 + if report_type == "RAG": + markdown_content += f""" +## 审查结果 +{result.get('review_result', '无审查结果')} + +""" + + # 如果是RAG审查,添加相关文档信息 + if report_type == "RAG" and result.get('relevant_docs'): + markdown_content += f""" +## 检索到的相关文档 +{result.get('relevant_docs', '无相关文档')} + +""" + + # 如果是对比报告,添加对比信息 + if report_type == "对比" and 'comparison' in result: + comparison = result['comparison'] + markdown_content += f""" +## 对比分析 +- **评分差异**: {comparison.get('score_difference', 0):+d} +- **检索文档数**: {comparison.get('unique_docs_count', 0)} +- **是否找到相关文档**: {'是' if comparison.get('has_relevant_docs', False) else '否'} + +### RAG增强审查结果 +**评分**: {result.get('rag_result', {}).get('score', 'N/A')}/100 + +{result.get('rag_result', {}).get('review_result', '无审查结果')} + +### 普通模型审查结果 +**评分**: {result.get('normal_result', {}).get('score', 'N/A')}/100 + +{result.get('normal_result', {}).get('review_result', '无审查结果')} + +""" + + markdown_content += f""" +--- +*报告由AI代码审查系统自动生成* +""" + + return markdown_content + +# 主界面 +st.title("🤖 RAG代码审查 - 知识库管理") + +# 侧边栏 +with st.sidebar: + st.header("导航") + page = st.selectbox("选择功能", [ + "📊 状态总览", + "📚 文档管理", + "🔍 文档搜索", + "📤 文档上传", + "🧪 RAG测试", + "📁 批量文件审查" + ]) + +# 状态总览页面 +if page == "📊 状态总览": + st.header("知识库状态") + + status = get_knowledge_status() + if status: + col1, col2, col3, col4 = st.columns(4) + + with col1: + st.metric("RAG状态", "启用" if status['rag_enabled'] else "禁用") + + with col2: + st.metric("总文档数", status['total_documents']) + + with col3: + st.metric("自定义文档", status['custom_documents']) + + with col4: + st.metric("内置文档", status['builtin_documents']) + + +# 文档管理页面 +elif page == "📚 文档管理": + st.header("文档管理") + + # 添加恢复内置文档按钮和批量操作按钮 + col1, col2, col3, col4 = st.columns([3, 3, 2, 4]) + with col1: + if st.button("🔄 恢复内置文档", use_container_width=True, help="恢复所有内置文档"): + with st.spinner("正在恢复内置文档..."): + result = restore_builtin_documents() + if result: + st.success("✅ 内置文档已恢复!") + # 清除缓存的文档列表并标记需要刷新 + if 'documents_cache' in st.session_state: + del st.session_state.documents_cache + st.session_state.refresh_documents = True + st.rerun() + + with col2: + if st.button("🔄 重新加载内置文档", use_container_width=True, help="清除并重新加载最新的内置文档"): + with st.spinner("正在重新加载内置文档..."): + result = reload_builtin_documents() + if result: + st.success("✅ 内置文档已重新加载!") + # 清除缓存的文档列表并标记需要刷新 + if 'documents_cache' in st.session_state: + del st.session_state.documents_cache + st.session_state.refresh_documents = True + st.rerun() + + # 获取文档列表(使用缓存) + if 'documents_cache' not in st.session_state or st.session_state.get('refresh_documents', False): + with st.spinner("正在加载文档列表..."): + documents_data = list_documents() + st.session_state.documents_cache = documents_data.get('documents', []) if documents_data else [] + st.session_state.refresh_documents = False + + documents = st.session_state.documents_cache + + if documents: + # 添加批量删除按钮 + with col3: + if st.button("🗑️ 批量删除", type="primary", use_container_width=True): + selected_docs = [ + (doc['doc_id'], doc['source']) + for doc in documents + if st.session_state.get(f"select_{doc['doc_id']}", False) + ] + if selected_docs: + success_count = 0 + progress_bar = st.progress(0) + for i, (doc_id, source) in enumerate(selected_docs): + with st.spinner(f"正在删除文档 {i+1}/{len(selected_docs)}..."): + result = delete_document(doc_id, source) + if result: + success_count += 1 + progress_bar.progress((i + 1) / len(selected_docs)) + st.success(f"✅ 成功删除 {success_count}/{len(selected_docs)} 个文档!") + # 清除缓存的文档列表并标记需要刷新 + if 'documents_cache' in st.session_state: + del st.session_state.documents_cache + st.session_state.refresh_documents = True + # 清除所有选择状态 + for doc in documents: + if hasattr(st.session_state, f"select_{doc['doc_id']}"): + delattr(st.session_state, f"select_{doc['doc_id']}") + st.rerun() + else: + st.warning("请先选择要删除的文档") + + # 定义来源显示映射 + source_display = { + "custom": "自定义文档", + "builtin": "内置文档" + } + + for doc in documents: + source_text = source_display.get(doc['source'], doc['source']) + col_checkbox, col_expander = st.columns([0.5, 11.5]) + + with col_checkbox: + st.checkbox("", key=f"select_{doc['doc_id']}", value=False) + + with col_expander: + with st.expander(f"📄 {doc['title']} ({source_text})"): + col1, col2 = st.columns([3, 1]) + + with col1: + st.write(f"**文档ID:** {doc['doc_id']}") + st.write(f"**来源:** {source_text}") + st.write(f"**标签:** {', '.join(doc['tags']) if doc['tags'] else '无'}") + st.write(f"**块数量:** {doc['chunk_count']}") + + with col2: + if st.button("删除", key=f"delete_{doc['doc_id']}"): + with st.spinner("正在删除文档..."): + result = delete_document(doc['doc_id'], doc['source']) + if result: + st.success("✅ 文档删除成功!") + # 清除缓存的文档列表并标记需要刷新 + if 'documents_cache' in st.session_state: + del st.session_state.documents_cache + st.session_state.refresh_documents = True + st.rerun() + else: + st.info("暂无文档") + +# 文档搜索页面 +elif page == "🔍 文档搜索": + st.header("文档搜索") + + col1, col2 = st.columns([3, 1]) + + with col1: + query = st.text_input("搜索查询", placeholder="输入搜索关键词...") + + with col2: + n_results = st.number_input("结果数量", min_value=1, max_value=20, value=5) + source = st.selectbox("搜索范围", ["全部", "自定义文档", "内置文档"]) + similarity_threshold = st.slider( + "相似度阈值", + min_value=0.0, + max_value=1.0, + value=0.0, + step=0.05, + help="只显示相似度大于等于该值的结果" + ) + + # 映射source值 + source_mapping = { + "全部": "all", + "自定义文档": "custom", + "内置文档": "builtin" + } + source = source_mapping[source] + + if st.button("搜索") and query: + # 使用缓存的搜索结果 + cache_key = f"search_results_{query}_{n_results}_{source}_{similarity_threshold}" + if cache_key not in st.session_state: + with st.spinner("正在搜索相关文档..."): + results = search_documents(query, n_results, source, similarity_threshold) + st.session_state[cache_key] = results + else: + results = st.session_state[cache_key] + + if results and results['results']: + # 过滤掉相似度低于阈值的结果 + filtered_results = [r for r in results['results'] if r['score'] >= similarity_threshold] + + if filtered_results: + st.write(f"找到 {len(filtered_results)} 个相关结果 (相似度阈值: {similarity_threshold:.2f}):") + + # 定义来源映射 + source_display = { + "custom": "自定义文档", + "builtin": "内置文档" + } + + for i, result in enumerate(filtered_results): + with st.expander(f"结果 {i+1}: {result['metadata']['title']} (相似度: {result['score']:.3f})"): + st.write(f"**来源:** {source_display.get(result['source'], result['source'])}") + st.write(f"**标签:** {result['metadata'].get('tags', '').replace(',', ', ')}") + st.write("**内容:**") + st.text(result['content']) + else: + st.info(f"未找到相似度大于等于 {similarity_threshold:.2f} 的结果") + else: + st.info("未找到相关结果") + +# 文档上传页面 +elif page == "📤 文档上传": + st.header("📤 文档上传") + + st.write("上传文档到知识库,用于RAG增强的代码审查。支持的文件类型:") + st.write("• **Markdown文档**: .md (推荐)") + st.write("• **文本文档**: .txt") + st.write("") + st.info("💡 **建议上传内容**: 代码规范、最佳实践、设计文档、架构说明、API文档等") + + st.divider() + + uploaded_file = st.file_uploader("选择要上传的文档", type=['txt', 'md']) + + if uploaded_file is not None: + st.success("✅ 文件已选择") + + # 显示文件信息 + st.write("**文件信息:**") + st.write(f"- 文件名: {uploaded_file.name}") + st.write(f"- 文件大小: {uploaded_file.size / 1024:.2f} KB") + st.write(f"- 文件类型: {uploaded_file.type}") + + st.divider() + + # 添加标签输入 + st.write("**文档标签** (可选)") + st.write("添加标签有助于文档分类和检索,多个标签用逗号分隔") + tags = st.text_input("标签 (用逗号分隔)", placeholder="例如: python, 代码规范, 最佳实践") + tags = [tag.strip() for tag in tags.split(",")] if tags else [] + + if tags: + st.write("**已添加标签:**") + for tag in tags: + st.write(f"• {tag}") + + st.divider() + + col1, col2, col3 = st.columns([1, 1, 1]) + with col2: + if st.button("📤 上传文档", type="primary", use_container_width=True): + with st.spinner("正在上传文档..."): + # 读取文件内容 + content = uploaded_file.read().decode('utf-8') + + # 上传文档 + result = upload_document(uploaded_file.name, content, tags) + + if result: + st.success("✅ 文档上传成功!") + st.balloons() + # 清除文档列表缓存,确保文档管理页面显示最新数据 + if 'documents_cache' in st.session_state: + del st.session_state.documents_cache + st.session_state.refresh_documents = True + st.rerun() + else: + st.error("❌ 文档上传失败,请重试!") + +# RAG测试页面 +elif page == "🧪 RAG测试": + st.header("RAG功能测试") + + st.write("输入代码片段,测试RAG增强的代码审查功能:") + + # 预设示例代码 + examples = { + "HTML示例": { + "code": HTML_EXAMPLE, + "commit": "实现响应式导航菜单组件,支持移动端适配" + }, + "CSS示例": { + "code": CSS_EXAMPLE, + "commit": "添加暗色主题样式,实现主题切换功能" + }, + "JavaScript示例": { + "code": JS_EXAMPLE, + "commit": "实现用户数据异步获取函数,包含错误处理和状态检查" + }, + "Java示例": { + "code": JAVA_EXAMPLE, + "commit": "实现用户创建服务,包含用户名查重和数据持久化功能" + }, + "Python示例": { + "code": PYTHON_EXAMPLE, + "commit": "实现购物车商品总价计算函数,支持批量计算" + }, + "C++示例": { + "code": CPP_EXAMPLE, + "commit": "实现线程安全的队列模板类,支持并发操作" + }, + "Go示例": { + "code": GO_EXAMPLE, + "commit": "实现消息处理函数,支持上下文控制和错误处理" + } + } + + # 示例选择器 + st.subheader("💡 快速开始 - 选择示例代码") + + # 示例说明 + example_descriptions = { + "HTML示例": "🌐 测试HTML结构和语义化标签使用 - 包含响应式布局、可访问性等", + "CSS示例": "🎨 测试CSS样式规范 - 包含响应式设计、布局结构、命名规范等", + "JavaScript示例": "📱 测试JavaScript交互逻辑 - 包含事件处理、DOM操作、性能优化等", + "Java示例": "☕ 测试Java代码规范 - 包含面向对象设计、异常处理、CRUD操作等", + "Python示例": "🐍 测试Python代码规范 - 包含数据库操作、类型注解、异常处理等", + "C++示例": "⚡ 测试C++代码规范 - 包含内存管理、智能指针、并发安全等", + "Go示例": "🔄 测试Go代码规范 - 包含并发处理、错误处理、接口设计等" + } + + # 调整整体布局比例,给按钮区域更多空间 + col1, col2 = st.columns([2.2, 1]) + + with col1: + selected_example = st.selectbox( + "选择示例", + options=list(examples.keys()), + help="选择一个预设的示例代码来测试RAG增强的代码审查功能" + ) + + # 显示选中示例的描述 + if selected_example in example_descriptions: + st.info(example_descriptions[selected_example]) + + with col2: + # 两个按钮等宽排列 + col2_1, col2_2 = st.columns(2) + with col2_1: + if st.button("使用此示例", type="primary", use_container_width=True): + st.session_state.example_code = examples[selected_example]["code"] + st.session_state.example_commit = examples[selected_example]["commit"] + st.rerun() + + with col2_2: + if st.button("清空代码", use_container_width=True): + if hasattr(st.session_state, 'example_code'): + del st.session_state.example_code + if hasattr(st.session_state, 'example_commit'): + del st.session_state.example_commit + st.rerun() + + # 获取代码内容 + default_code = "" + default_commit = "" + + if hasattr(st.session_state, 'example_code'): + default_code = st.session_state.example_code + default_commit = st.session_state.example_commit + + st.subheader("🔧 代码审查") + + code = st.text_area( + "代码内容", + value=default_code, + height=300, + placeholder="输入要审查的代码,或使用上面的示例...", + help="输入需要进行代码审查的代码内容" + ) + + commit_message = st.text_area( + "代码功能说明 (可选)", + value=default_commit, # 使用default_commit作为默认值 + placeholder="请简要说明这段代码的主要功能和目的...", + height=100 + ) + + # 添加模型参数控制区域 + st.subheader("⚙️ 模型参数设置") + + param_col1, param_col2, param_col3 = st.columns(3) + + # 初始化重置计数器 + if 'reset_counter' not in st.session_state: + st.session_state.reset_counter = 0 + + # 重置参数按钮 + with param_col3: + # 重置参数按钮 + if st.button("🔄 重置参数", key="reset_params_btn", use_container_width=True): + # 增加重置计数器,强制滑块重新初始化 + st.session_state.reset_counter += 1 + st.rerun() + + with param_col1: + # 温度控制滑块 - 使用动态key + temperature = st.slider( + "🌡️ 模型温度", + min_value=0.0, + max_value=2.0, + value=0.3, # 直接使用默认值 + step=0.1, + key=f"temperature_slider_{st.session_state.reset_counter}", + help="控制AI输出的随机性:\n• 0.0-0.3: 确定性高,适合代码审查\n• 0.4-0.7: 平衡创造性和一致性\n• 0.8-2.0: 创造性高,输出更随机" + ) + + # 显示温度说明 + if temperature <= 0.3: + st.info("🎯 确定性模式:输出稳定一致") + elif temperature <= 0.7: + st.info("⚖️ 平衡模式:平衡创造性和一致性") + else: + st.info("🎨 创造性模式:输出更具创造性") + + with param_col2: + # 相似度阈值控制 - 使用动态key + similarity_threshold = st.slider( + "📊 相似度阈值", + min_value=0.0, + max_value=1.0, + value=0.2, # 直接使用默认值 + step=0.05, + key=f"similarity_slider_{st.session_state.reset_counter}", + help="控制检索文档的相关性:\n• 0.0: 显示所有检索结果\n• 0.2-0.5: 显示相关度较高的文档\n• 0.6-1.0: 只显示高度相关的文档" + ) + + # 显示当前参数状态 + with param_col3: + # 显示当前参数状态 + st.markdown("**当前参数设置:**") + st.markdown(f"• 温度: **{temperature}**") + st.markdown(f"• 相似度阈值: **{similarity_threshold}**") + + # 测试按钮区域 + col1, col2 = st.columns(2) + + with col1: + if st.button("🧪 RAG测试", type="primary", use_container_width=True): + if code.strip(): + with st.spinner("正在进行RAG增强的代码审查..."): + result = test_rag(code, commit_message, temperature, similarity_threshold) + + if result: + st.success("RAG测试完成!") + + # 存储结果到session state + st.session_state.rag_result = result + st.session_state.current_code = code + st.session_state.current_commit = commit_message + + # 清除对比结果的显示标记,确保只显示RAG测试结果 + if hasattr(st.session_state, 'show_compare_result'): + del st.session_state.show_compare_result + + # 显示相关文档 + st.subheader("📚 检索到的相关文档") + if result['relevant_docs']: + # 按文档分段显示 + docs = result['relevant_docs'].split('###') + for i, doc in enumerate(docs): + if doc.strip(): # 跳过空文档 + # 提取标题和相似度 + lines = doc.strip().split('\n') + if lines: + title_line = lines[0] + content = '\n'.join(lines[1:]) + with st.expander(f"📄 {title_line}"): + st.text(content) + else: + st.info("未找到相关文档") + + # 显示审查结果 + st.subheader("🔍 RAG审查结果") + st.markdown(result['review_result']) + + # 显示评分 + st.metric("RAG审查评分", f"{result['score']}/100") + else: + st.error("请输入代码内容") + + with col2: + if st.button("📊 普通/RAG模式对比", help="对比使用和不使用RAG(检索增强生成)的两种审查模式的效果差异", use_container_width=True): + if code.strip(): + with st.spinner("正在对比两种审查模式的效果..."): + result = compare_rag(code, commit_message, temperature, similarity_threshold) + + if result: + st.success("审查模式对比完成!") + + # 将结果存储到session state中,以便在整个页面宽度显示 + st.session_state.compare_result = result + st.session_state.current_code = code + st.session_state.current_commit = commit_message + st.session_state.show_compare_result = True # 标记显示对比结果 + st.rerun() + else: + st.error("请输入代码内容") + + # 导出功能区域 + if hasattr(st.session_state, 'rag_result') or hasattr(st.session_state, 'compare_result'): + st.subheader("📤 导出审查报告") + + export_col1, export_col2, export_col3 = st.columns([2, 2, 2]) + + with export_col1: + if hasattr(st.session_state, 'rag_result'): + # 生成文件名 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"rag_code_review_{timestamp}.md" + + # 直接提供下载按钮 + markdown_content = generate_markdown_report( + st.session_state.rag_result, + st.session_state.current_code, + st.session_state.current_commit, + "RAG" + ) + + st.download_button( + label="📄 导出RAG报告", + data=markdown_content, + file_name=filename, + mime="text/markdown", + use_container_width=True + ) + + with export_col2: + if hasattr(st.session_state, 'compare_result'): + # 生成文件名 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"compare_code_review_{timestamp}.md" + + # 直接提供下载按钮 + markdown_content = generate_markdown_report( + st.session_state.compare_result, + st.session_state.current_code, + st.session_state.current_commit, + "对比" + ) + + st.download_button( + label="📊 导出对比报告", + data=markdown_content, + file_name=filename, + mime="text/markdown", + use_container_width=True + ) + + with export_col3: + if st.button("🗑️ 清除结果", use_container_width=True): + # 清除所有结果 + for key in ['rag_result', 'compare_result', 'current_code', 'current_commit', 'show_compare_result']: + if hasattr(st.session_state, key): + delattr(st.session_state, key) + st.rerun() + + # 如果有对比结果,在整个页面宽度显示 + # 只有当用户明确点击了对比按钮时才显示对比内容 + if hasattr(st.session_state, 'compare_result') and st.session_state.get('show_compare_result', False): + result = st.session_state.compare_result + + # 显示对比统计 - 占满整个宽度 + st.subheader("📈 对比统计") + comp_col1, comp_col2, comp_col3 = st.columns(3) + + with comp_col1: + st.metric( + "评分差异", + f"{result['comparison']['score_difference']:+d}", + help="RAG增强审查评分与普通审查评分的差值" + ) + + with comp_col2: + st.metric( + "检索文档数", + result['comparison']['unique_docs_count'] + ) + + with comp_col3: + has_docs = "✅" if result['comparison']['has_relevant_docs'] else "❌" + st.metric("找到相关文档", has_docs) + + # 显示检索到的相关文档 + if result['rag_result']['relevant_docs']: + st.subheader("📚 检索到的相关文档") + # 按文档分段显示 + docs = result['rag_result']['relevant_docs'].split('###') + for i, doc in enumerate(docs): + if doc.strip(): # 跳过空文档 + # 提取标题和相似度 + lines = doc.strip().split('\n') + if lines: + title_line = lines[0] + content = '\n'.join(lines[1:]) + with st.expander(f"📄 {title_line}"): + st.text(content) + + # 并排显示两个审查结果 + st.subheader("🔍 审查结果对比") + + result_col1, result_col2 = st.columns(2) + + with result_col1: + st.markdown("### 🤖 RAG增强审查") + st.metric("评分", f"{result['rag_result']['score']}/100") + st.markdown(result['rag_result']['review_result']) + + with result_col2: + st.markdown("### 🔧 普通模型审查") + st.metric("评分", f"{result['normal_result']['score']}/100") + st.markdown(result['normal_result']['review_result']) + + if result['comparison']['has_relevant_docs']: + st.info("📖 系统找到了相关的技术文档,这些文档被用于增强审查结果") + else: + st.warning("📭 系统未找到相关的技术文档,建议添加更多相关的编码规范文档") + +# 批量文件审查页面 +elif page == "📁 批量文件审查": + st.header("📁 批量文件审查") + + st.write("上传多个代码文件,批量进行RAG增强的代码审查。支持多种编程语言:") + st.write("• **Python**: .py") + st.write("• **Java**: .java") + st.write("• **C++**: .cpp, .cc, .cxx, .h, .hpp") + st.write("• **JavaScript**: .js, .ts, .jsx, .tsx") + st.write("• **Go**: .go") + st.write("• **其他**: .txt, .md") + + st.divider() + + # 文件上传区域 + uploaded_files = st.file_uploader( + "选择要审查的代码文件", + type=['py', 'java', 'cpp', 'cc', 'cxx', 'h', 'hpp', 'js', 'ts', 'jsx', 'tsx', 'go', 'txt', 'md'], + accept_multiple_files=True, + help="可以同时选择多个文件进行批量审查" + ) + + if uploaded_files: + st.success(f"✅ 已选择 {len(uploaded_files)} 个文件") + + # 显示文件信息 + st.subheader("📋 文件信息") + file_info = [] + for i, file in enumerate(uploaded_files): + file_info.append({ + 'index': i + 1, + 'name': file.name, + 'size': file.size, + 'type': file.type or '未知', + 'extension': file.name.split('.')[-1].lower() if '.' in file.name else '无扩展名' + }) + + # 创建文件信息表格 + import pandas as pd + df = pd.DataFrame(file_info) + st.dataframe(df, use_container_width=True) + + st.divider() + + # 批量审查设置 + st.subheader("⚙️ 批量审查设置") + + # 模型参数设置 + param_col1, param_col2, param_col3 = st.columns(3) + + with param_col1: + temperature = st.slider( + "🌡️ 模型温度", + min_value=0.0, + max_value=2.0, + value=0.3, + step=0.1, + help="控制AI输出的随机性" + ) + + with param_col2: + similarity_threshold = st.slider( + "📊 相似度阈值", + min_value=0.0, + max_value=1.0, + value=0.2, + step=0.05, + help="控制检索文档的相关性" + ) + + with param_col3: + review_mode = st.selectbox( + "🔍 审查模式", + ["RAG测试", "RAG/普通对比"], + help="选择审查模式:仅RAG测试或对比两种模式" + ) + + # 提交信息设置 + st.write("**提交信息设置**") + commit_mode = st.radio( + "提交信息模式", + ["使用文件名作为提交信息", "自定义统一提交信息", "为每个文件单独设置"], + help="选择如何为文件设置提交信息" + ) + + custom_commit = "" + if commit_mode == "自定义统一提交信息": + custom_commit = st.text_area( + "统一提交信息", + placeholder="请输入统一的提交信息...", + height=80 + ) + + # 文件提交信息映射 + file_commits = {} + if commit_mode == "为每个文件单独设置": + st.write("**为每个文件设置提交信息:**") + for file in uploaded_files: + commit = st.text_input( + f"文件 {file.name} 的提交信息", + placeholder="请输入提交信息...", + key=f"commit_{file.name}" + ) + file_commits[file.name] = commit + + st.divider() + + # 开始批量审查 + if st.button("🚀 开始批量审查", type="primary", use_container_width=True): + if not uploaded_files: + st.error("请先选择要审查的文件") + else: + # 初始化进度 + progress_bar = st.progress(0) + status_text = st.empty() + + # 存储所有审查结果 + all_results = [] + + for i, file in enumerate(uploaded_files): + try: + # 更新进度 + progress = (i + 1) / len(uploaded_files) + progress_bar.progress(progress) + status_text.text(f"正在审查文件 {i+1}/{len(uploaded_files)}: {file.name}") + + # 读取文件内容 + content = file.read().decode('utf-8') + + # 确定提交信息 + if commit_mode == "使用文件名作为提交信息": + commit_message = f"文件: {file.name}" + elif commit_mode == "自定义统一提交信息": + commit_message = custom_commit + else: # 为每个文件单独设置 + commit_message = file_commits.get(file.name, f"文件: {file.name}") + + # 根据模式进行审查 + if review_mode == "RAG测试": + result = test_rag(content, commit_message, temperature, similarity_threshold) + if result: + all_results.append({ + 'file_name': file.name, + 'file_size': file.size, + 'commit_message': commit_message, + 'mode': 'RAG测试', + 'result': result, + 'success': True + }) + else: + all_results.append({ + 'file_name': file.name, + 'file_size': file.size, + 'commit_message': commit_message, + 'mode': 'RAG测试', + 'result': None, + 'success': False, + 'error': '审查失败' + }) + else: # RAG/普通对比 + result = compare_rag(content, commit_message, temperature, similarity_threshold) + if result: + all_results.append({ + 'file_name': file.name, + 'file_size': file.size, + 'commit_message': commit_message, + 'mode': 'RAG/普通对比', + 'result': result, + 'success': True + }) + else: + all_results.append({ + 'file_name': file.name, + 'file_size': file.size, + 'commit_message': commit_message, + 'mode': 'RAG/普通对比', + 'result': None, + 'success': False, + 'error': '审查失败' + }) + + # 重置文件指针,以便后续可能的重新读取 + file.seek(0) + + except Exception as e: + all_results.append({ + 'file_name': file.name, + 'file_size': file.size, + 'commit_message': commit_message if 'commit_message' in locals() else f"文件: {file.name}", + 'mode': review_mode, + 'result': None, + 'success': False, + 'error': str(e) + }) + + # 完成进度 + progress_bar.progress(1.0) + status_text.text("批量审查完成!") + + # 存储结果到session state + st.session_state.batch_results = all_results + st.session_state.batch_files = uploaded_files + + st.success(f"✅ 批量审查完成! 成功审查 {len([r for r in all_results if r['success']])}/{len(all_results)} 个文件") + + # 显示结果摘要 + st.subheader("📊 审查结果摘要") + + success_count = len([r for r in all_results if r['success']]) + failed_count = len(all_results) - success_count + + col1, col2, col3 = st.columns(3) + with col1: + st.metric("总文件数", len(all_results)) + with col2: + st.metric("成功审查", success_count) + with col3: + st.metric("审查失败", failed_count) + + # 显示详细结果 + st.subheader("📋 详细审查结果") + + for i, result in enumerate(all_results): + with st.expander(f"📄 {result['file_name']} ({'✅ 成功' if result['success'] else '❌ 失败'})"): + if result['success']: + if result['mode'] == "RAG测试": + st.write(f"**提交信息:** {result['commit_message']}") + st.write(f"**文件大小:** {result['file_size']} 字节") + st.metric("RAG审查评分", f"{result['result']['score']}/100") + st.write("**审查结果:**") + st.markdown(result['result']['review_result']) + + # 显示相关文档 + if result['result']['relevant_docs']: + st.write("**相关文档:**") + st.text(result['result']['relevant_docs']) + else: # RAG/普通对比 + st.write(f"**提交信息:** {result['commit_message']}") + st.write(f"**文件大小:** {result['file_size']} 字节") + + # 对比统计 + comp_col1, comp_col2, comp_col3 = st.columns(3) + with comp_col1: + st.metric("评分差异", f"{result['result']['comparison']['score_difference']:+d}") + with comp_col2: + st.metric("检索文档数", result['result']['comparison']['unique_docs_count']) + with comp_col3: + has_docs = "✅" if result['result']['comparison']['has_relevant_docs'] else "❌" + st.metric("找到相关文档", has_docs) + + # 并排显示结果 + result_col1, result_col2 = st.columns(2) + with result_col1: + st.markdown("**RAG增强审查:**") + st.metric("评分", f"{result['result']['rag_result']['score']}/100") + st.markdown(result['result']['rag_result']['review_result']) + with result_col2: + st.markdown("**普通模型审查:**") + st.metric("评分", f"{result['result']['normal_result']['score']}/100") + st.markdown(result['result']['normal_result']['review_result']) + else: + st.error(f"审查失败: {result.get('error', '未知错误')}") + + # 导出功能 + st.subheader("📤 导出批量审查报告") + + if st.button("📄 导出批量审查报告", use_container_width=True): + # 生成批量审查报告 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"batch_code_review_{timestamp}.md" + + # 生成报告内容 + report_content = f"""# 批量代码审查报告 + +## 基本信息 +- **生成时间**: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} +- **审查模式**: {review_mode} +- **总文件数**: {len(all_results)} +- **成功审查**: {success_count} +- **审查失败**: {failed_count} + +## 模型参数 +- **模型温度**: {temperature} +- **相似度阈值**: {similarity_threshold} + +## 审查结果 + +""" + + for result in all_results: + report_content += f""" +### {result['file_name']} + +**状态**: {'✅ 成功' if result['success'] else '❌ 失败'} +**提交信息**: {result['commit_message']} +**文件大小**: {result['file_size']} 字节 + +""" + + if result['success']: + if result['mode'] == "RAG测试": + report_content += f""" +**RAG审查评分**: {result['result']['score']}/100 + +**审查结果**: +{result['result']['review_result']} + +""" + if result['result']['relevant_docs']: + report_content += f""" +**相关文档**: +{result['result']['relevant_docs']} + +""" + else: # RAG/普通对比 + report_content += f""" +**对比统计**: +- 评分差异: {result['result']['comparison']['score_difference']:+d} +- 检索文档数: {result['result']['comparison']['unique_docs_count']} +- 找到相关文档: {'是' if result['result']['comparison']['has_relevant_docs'] else '否'} + +**RAG增强审查结果**: +评分: {result['result']['rag_result']['score']}/100 + +{result['result']['rag_result']['review_result']} + +**普通模型审查结果**: +评分: {result['result']['normal_result']['score']}/100 + +{result['result']['normal_result']['review_result']} + +""" + else: + report_content += f""" +**错误信息**: {result.get('error', '未知错误')} + +""" + + report_content += f""" +--- +*报告由AI代码审查系统自动生成* +""" + + # 提供下载 + st.download_button( + label="📥 下载批量审查报告", + data=report_content, + file_name=filename, + mime="text/markdown", + use_container_width=True + ) + + # 清除结果按钮 + if st.button("🗑️ 清除批量审查结果", use_container_width=True): + if 'batch_results' in st.session_state: + del st.session_state.batch_results + if 'batch_files' in st.session_state: + del st.session_state.batch_files + st.rerun() + +# 页面底部信息 +st.markdown("---") +st.markdown("💡 **使用说明:**") +st.markdown(""" +- **状态总览**: 查看知识库的整体状态和统计信息 +- **文档管理**: 查看和管理已上传的文档 +- **文档搜索**: 基于语义搜索查找相关文档 +- **上传文档**: 添加自定义技术文档到知识库 +- **RAG测试**: + - **🧪 RAG测试**: 测试基于知识库的代码审查功能 + - **📊 RAG对比测试**: 同时进行RAG和普通模型审查,直观对比两种方式的差异 + - **📤 导出功能**: 将审查结果导出为Markdown格式的报告 + - 提供多种编程语言的示例代码供快速体验 + - 支持自定义代码输入和提交信息 +- **批量文件审查**: + - **📁 批量上传**: 支持同时上传多个不同编程语言的文件 + - **🔍 批量审查**: 可选择RAG测试或RAG/普通对比模式 + - **📊 结果汇总**: 显示批量审查的统计信息和详细结果 + - **📤 报告导出**: 生成包含所有文件审查结果的综合报告 + - 支持多种提交信息设置模式 +""") + +# 添加模型信息 +st.markdown("---") +st.markdown("🤖 **AI模型信息:**") +st.markdown(""" +本系统使用多种AI模型进行代码审查: +- **RAG增强模型**: 结合知识库检索的智能代码审查 +- **基础模型**: 纯AI模型的代码审查 +- **支持模型**: OpenAI GPT系列、DeepSeek、Qwen、智谱AI等 +""") \ No newline at end of file From 3967b14c87faef21b94262bb93fcb0d611751f04 Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:26:22 +0800 Subject: [PATCH 14/27] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=9C=A8Push=E4=BA=8B?= =?UTF-8?q?=E4=BB=B6=E5=92=8CMerge=20Request=E4=BA=8B=E4=BB=B6=E4=B8=AD?= =?UTF-8?q?=E9=9B=86=E6=88=90RAG=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- biz/queue/worker.py | 130 +++++++++++++++++++++----------------------- 1 file changed, 62 insertions(+), 68 deletions(-) diff --git a/biz/queue/worker.py b/biz/queue/worker.py index 169648e52..071621301 100644 --- a/biz/queue/worker.py +++ b/biz/queue/worker.py @@ -7,6 +7,7 @@ from biz.gitlab.webhook_handler import filter_changes, MergeRequestHandler, PushHandler from biz.github.webhook_handler import filter_changes as filter_github_changes, PullRequestHandler as GithubPullRequestHandler, PushHandler as GithubPushHandler from biz.utils.code_reviewer import CodeReviewer +from biz.utils.rag_code_reviewer import RAGCodeReviewer from biz.utils.im import notifier from biz.utils.log import logger @@ -19,45 +20,55 @@ def handle_push_event(webhook_data: dict, gitlab_token: str, gitlab_url: str, gi logger.info('Push Hook event received') commits = handler.get_push_commits() if not commits: - logger.error('Failed to get commits') + logger.info('No commits found in push event (likely branch creation/deletion)') return review_result = None score = 0 - additions = 0 - deletions = 0 + should_record = False # 是否应该记录到数据库 + if push_review_enabled: # 获取PUSH的changes changes = handler.get_push_changes() logger.info('changes: %s', changes) changes = filter_changes(changes) + if not changes: logger.info('未检测到PUSH代码的修改,修改文件可能不满足SUPPORTED_EXTENSIONS。') - review_result = "关注的文件没有修改" - - if len(changes) > 0: + review_result = "关注的文件没有修改" + # 如果没有代码变更,不记录到数据库 + should_record = False + else: + should_record = True # 有代码变更才记录 commits_text = ';'.join(commit.get('message', '').strip() for commit in commits) - review_result = CodeReviewer().review_and_strip_code(str(changes), commits_text) - score = CodeReviewer.parse_review_score(review_text=review_result) - for item in changes: - additions += item['additions'] - deletions += item['deletions'] - # 将review结果提交到Gitlab的 notes - handler.add_push_notes(f'Auto Review Result: \n{review_result}') + # 使用RAG增强的代码审查器 + enable_rag = os.environ.get('ENABLE_RAG', '1') == '1' + if enable_rag: + reviewer = RAGCodeReviewer() + review_result = reviewer.review_and_strip_code(str(changes), commits_text) + score = reviewer.parse_review_score(review_text=review_result) + else: + review_result = CodeReviewer().review_and_strip_code(str(changes), commits_text) + score = CodeReviewer.parse_review_score(review_text=review_result) + # 将review结果提交到Gitlab的 notes + handler.add_push_notes(f'Auto Review Result: \n{review_result}') - event_manager['push_reviewed'].send(PushReviewEntity( - project_name=webhook_data['project']['name'], - author=webhook_data['user_username'], - branch=webhook_data['project']['default_branch'], - updated_at=int(datetime.now().timestamp()), # 当前时间 - commits=commits, - score=score, - review_result=review_result, - url_slug=gitlab_url_slug, - webhook_data=webhook_data, - additions=additions, - deletions=deletions, - )) + # 只有在有代码变更时才记录到数据库 + if should_record: + # 获取第一个commit的URL作为推送记录的URL + push_url = commits[0].get('url', '') if commits else '' + + event_manager['push_reviewed'].send(PushReviewEntity( + project_name=webhook_data['project']['name'], + author=webhook_data['user_username'], + branch=webhook_data['project']['default_branch'], + updated_at=int(datetime.now().timestamp()), # 当前时间 + commits=commits, + score=score, + review_result=review_result, + url_slug=gitlab_url_slug, + url=push_url, + )) except Exception as e: error_message = f'服务出现未知错误: {str(e)}\n{traceback.format_exc()}' @@ -74,15 +85,10 @@ def handle_merge_request_event(webhook_data: dict, gitlab_token: str, gitlab_url :param gitlab_url_slug: :return: ''' - merge_review_only_protected_branches = os.environ.get('MERGE_REVIEW_ONLY_PROTECTED_BRANCHES_ENABLED', '0') == '1' try: # 解析Webhook数据 handler = MergeRequestHandler(webhook_data, gitlab_token, gitlab_url) logger.info('Merge Request Hook event received') - # 如果开启了仅review projected branches的,判断当前目标分支是否为projected branches - if merge_review_only_protected_branches and not handler.target_branch_protected(): - logger.info("Merge Request target branch not match protected branches, ignored.") - return if handler.action not in ['open', 'update']: logger.info(f"Merge Request Hook event, action={handler.action}, ignored.") @@ -96,12 +102,6 @@ def handle_merge_request_event(webhook_data: dict, gitlab_token: str, gitlab_url if not changes: logger.info('未检测到有关代码的修改,修改文件可能不满足SUPPORTED_EXTENSIONS。') return - # 统计本次新增、删除的代码总数 - additions = 0 - deletions = 0 - for item in changes: - additions += item.get('additions', 0) - deletions += item.get('deletions', 0) # 获取Merge Request的commits commits = handler.get_merge_request_commits() @@ -110,8 +110,16 @@ def handle_merge_request_event(webhook_data: dict, gitlab_token: str, gitlab_url return # review 代码 - commits_text = ';'.join(commit['title'] for commit in commits) - review_result = CodeReviewer().review_and_strip_code(str(changes), commits_text) + commits_text = ';'.join(commit['message'] for commit in commits) + # 使用RAG增强的代码审查器 + enable_rag = os.environ.get('ENABLE_RAG', '1') == '1' + if enable_rag: + reviewer = RAGCodeReviewer() + review_result = reviewer.review_and_strip_code(str(changes), commits_text) + score = reviewer.parse_review_score(review_text=review_result) + else: + review_result = CodeReviewer().review_and_strip_code(str(changes), commits_text) + score = CodeReviewer.parse_review_score(review_text=review_result) # 将review结果提交到Gitlab的 notes handler.add_merge_request_notes(f'Auto Review Result: \n{review_result}') @@ -125,13 +133,10 @@ def handle_merge_request_event(webhook_data: dict, gitlab_token: str, gitlab_url target_branch=webhook_data['object_attributes']['target_branch'], updated_at=int(datetime.now().timestamp()), commits=commits, - score=CodeReviewer.parse_review_score(review_text=review_result), + score=score, url=webhook_data['object_attributes']['url'], review_result=review_result, url_slug=gitlab_url_slug, - webhook_data=webhook_data, - additions=additions, - deletions=deletions, ) ) @@ -152,8 +157,6 @@ def handle_github_push_event(webhook_data: dict, github_token: str, github_url: review_result = None score = 0 - additions = 0 - deletions = 0 if push_review_enabled: # 获取PUSH的changes changes = handler.get_push_changes() @@ -165,14 +168,21 @@ def handle_github_push_event(webhook_data: dict, github_token: str, github_url: if len(changes) > 0: commits_text = ';'.join(commit.get('message', '').strip() for commit in commits) - review_result = CodeReviewer().review_and_strip_code(str(changes), commits_text) - score = CodeReviewer.parse_review_score(review_text=review_result) - for item in changes: - additions += item.get('additions', 0) - deletions += item.get('deletions', 0) + # 使用RAG增强的代码审查器 + enable_rag = os.environ.get('ENABLE_RAG', '1') == '1' + if enable_rag: + reviewer = RAGCodeReviewer() + review_result = reviewer.review_and_strip_code(str(changes), commits_text) + score = reviewer.parse_review_score(review_text=review_result) + else: + review_result = CodeReviewer().review_and_strip_code(str(changes), commits_text) + score = CodeReviewer.parse_review_score(review_text=review_result) # 将review结果提交到GitHub的 notes handler.add_push_notes(f'Auto Review Result: \n{review_result}') + # 获取第一个commit的URL作为推送记录的URL + push_url = commits[0].get('url', '') if commits else '' + event_manager['push_reviewed'].send(PushReviewEntity( project_name=webhook_data['repository']['name'], author=webhook_data['sender']['login'], @@ -182,9 +192,7 @@ def handle_github_push_event(webhook_data: dict, github_token: str, github_url: score=score, review_result=review_result, url_slug=github_url_slug, - webhook_data=webhook_data, - additions=additions, - deletions=deletions, + url=push_url, )) except Exception as e: @@ -202,15 +210,10 @@ def handle_github_pull_request_event(webhook_data: dict, github_token: str, gith :param github_url_slug: :return: ''' - merge_review_only_protected_branches = os.environ.get('MERGE_REVIEW_ONLY_PROTECTED_BRANCHES_ENABLED', '0') == '1' try: # 解析Webhook数据 handler = GithubPullRequestHandler(webhook_data, github_token, github_url) logger.info('GitHub Pull Request event received') - # 如果开启了仅review projected branches的,判断当前目标分支是否为projected branches - if merge_review_only_protected_branches and not handler.target_branch_protected(): - logger.info("Merge Request target branch not match protected branches, ignored.") - return if handler.action not in ['opened', 'synchronize']: logger.info(f"Pull Request Hook event, action={handler.action}, ignored.") @@ -224,12 +227,6 @@ def handle_github_pull_request_event(webhook_data: dict, github_token: str, gith if not changes: logger.info('未检测到有关代码的修改,修改文件可能不满足SUPPORTED_EXTENSIONS。') return - # 统计本次新增、删除的代码总数 - additions = 0 - deletions = 0 - for item in changes: - additions += item.get('additions', 0) - deletions += item.get('deletions', 0) # 获取Pull Request的commits commits = handler.get_pull_request_commits() @@ -256,10 +253,7 @@ def handle_github_pull_request_event(webhook_data: dict, github_token: str, gith score=CodeReviewer.parse_review_score(review_text=review_result), url=webhook_data['pull_request']['html_url'], review_result=review_result, - url_slug=github_url_slug, - webhook_data=webhook_data, - additions=additions, - deletions=deletions, + url_slug=github_url_slug )) except Exception as e: From 0a58fb9d4a3cb87e13b3706fb642a64783c27a28 Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:28:38 +0800 Subject: [PATCH 15/27] =?UTF-8?q?=E5=A2=9E=E5=8A=A0RAG=E4=BB=A5=E5=8F=8A?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E6=B8=A9=E5=BA=A6=E7=AD=89=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- conf/.env.dist | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/conf/.env.dist b/conf/.env.dist index 0e685330a..74d24ccb0 100644 --- a/conf/.env.dist +++ b/conf/.env.dist @@ -8,14 +8,14 @@ TZ=Asia/Shanghai LLM_PROVIDER=deepseek #DeepSeek settings -DEEPSEEK_API_KEY= +DEEPSEEK_API_KEY=sk-7f956efd0c864fd5b7b9260fc7ca459c DEEPSEEK_API_BASE_URL=https://api.deepseek.com DEEPSEEK_API_MODEL=deepseek-chat #OpenAI settings -OPENAI_API_KEY=xxxx -OPENAI_API_BASE_URL=https://api.openai.com/v1 -OPENAI_API_MODEL=gpt-4o-mini +OPENAI_API_KEY=EMPTY +OPENAI_API_BASE_URL=http://127.0.0.1:9997/v1 +OPENAI_API_MODEL=Llama2-Chinese-13b-Chat-ms #ZhipuAI settings ZHIPUAI_API_KEY=xxxx @@ -31,10 +31,16 @@ QWEN_API_MODEL=qwen-coder-plus OLLAMA_API_BASE_URL=http://host.docker.internal:11434 OLLAMA_API_MODEL=deepseek-r1:latest +# 模型温度控制 (0.0-2.0) +# 0.0-0.3: 确定性高,适合代码审查 +# 0.4-0.7: 平衡创造性和一致性 +# 0.8-2.0: 创造性高,输出更随机 +LLM_TEMPERATURE=0.3 + #支持review的文件类型 -SUPPORTED_EXTENSIONS=.c,.cc,.cpp,.css,.go,.h,.java,.js,.jsx,.ts,.tsx,.md,.php,.py,.sql,.vue,.yml +SUPPORTED_EXTENSIONS=.c,.cc,.cpp,.css,.go,.h,.java,.js,.jsx,.ts,.tsx,.md,.php,.py,.sql,.vue,.yml,.html #每次 Review 的最大 Token 限制(超出部分自动截断) -REVIEW_MAX_TOKENS=10000 +REVIEW_MAX_TOKENS=30000 #Review 风格选项:professional(专业) | sarcastic(毒舌) | gentle(温和) | humorous(幽默) REVIEW_STYLE=professional @@ -50,11 +56,6 @@ WECOM_WEBHOOK_URL=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxx FEISHU_ENABLED=0 FEISHU_WEBHOOK_URL=https://open.feishu.cn/open-apis/bot/v2/hook/xxx -#自定义webhook配置,使用场景:通过飞书发送应用消息可以实现Push评审通知到提交人,在自定义webhook里可以实现各种定制通知功能 -#参数EXTRA_WEBHOOK_URL接收POST请求,data={ai_codereview_data: {}, webhook_data: {}},ai_codereview_data为本系统通知的数据,webhook_data为原github、gitlab hook触发的数据 -EXTRA_WEBHOOK_ENABLED=0 -EXTRA_WEBHOOK_URL=https://xxx/xxx - #日志配置 LOG_FILE=log/app.log LOG_MAX_BYTES=10485760 @@ -66,25 +67,38 @@ REPORT_CRONTAB_EXPRESSION=0 18 * * 1-5 #Gitlab配置 #GITLAB_URL={YOUR_GITLAB_URL} #部分老版本Gitlab webhook不传递URL,需要开启此配置,示例:https://gitlab.example.com -#GITLAB_ACCESS_TOKEN={YOUR_GITLAB_ACCESS_TOKEN} #系统会优先使用此GITLAB_ACCESS_TOKEN,如果未配置,则使用Webhook 传递的Secret Token +GITLAB_ACCESS_TOKEN=glpat-sxihr4Ee_qym9QEqu6GW #系统会优先使用此GITLAB_ACCESS_TOKEN,如果未配置,则使用Webhook 传递的Secret Token #Github配置(如果使用 Github 作为代码托管平台,需要配置此项) #GITHUB_ACCESS_TOKEN={YOUR_GITHUB_ACCESS_TOKEN} # 开启Push Review功能(如果不需要push事件触发Code Review,设置为0) PUSH_REVIEW_ENABLED=1 -# 开启Merge请求过滤,过滤仅当合并目标分支是受保护分支时才Review(开启此选项请确保仓库已配置受保护分支protected branches) -MERGE_REVIEW_ONLY_PROTECTED_BRANCHES_ENABLED=0 # Dashboard登录用户名和密码 DASHBOARD_USER=admin -DASHBOARD_PASSWORD=admin +DASHBOARD_PASSWORD=wengqian # queue (async, rq) QUEUE_DRIVER=async -# REDIS_HOST=redis +REDIS_HOST=redis # REDIS_HOST=127.0.0.1 # REDIS_PORT=6379 # gitlab domain slugged WORKER_QUEUE=git_test_com + +# RAG功能配置 +ENABLE_RAG=1 +# 1表示启用RAG,0表示使用原有审查方式 + +# 知识库配置 +KNOWLEDGE_BASE_PATH=data/knowledge_base +CHUNK_SIZE=1000 +CHUNK_OVERLAP=200 +SEARCH_RESULTS_LIMIT=5 +RAG_SIMILARITY_THRESHOLD=0.2 +AUTO_INIT_BUILTIN_KNOWLEDGE=0 + +# HMAC-SHA256 签名 +SECRET_KEY=fac8cf149bdd616c07c1a675c4571ccacc40d7f7fe16914cfe0f9f9d966bb773 From 571371ad4a761df3c5d35f35787e28fbb03c9637 Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:29:54 +0800 Subject: [PATCH 16/27] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=86=85=E7=BD=AE?= =?UTF-8?q?=E7=9F=A5=E8=AF=86=E5=BA=93=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- conf/builtin_knowledge.yml | 49 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 conf/builtin_knowledge.yml diff --git a/conf/builtin_knowledge.yml b/conf/builtin_knowledge.yml new file mode 100644 index 000000000..e71511bd1 --- /dev/null +++ b/conf/builtin_knowledge.yml @@ -0,0 +1,49 @@ +# 内置知识库配置 +# 配置内置技术文档和最佳实践 + +builtin_documents: + - title: "HTML编码规范" + file: "docs/builtin/html_standards.md" + tags: ["html", "frontend", "coding-standards"] + description: "HTML开发的编码规范,包括语义化标签、可访问性、SEO优化等" + + - title: "CSS编码规范" + file: "docs/builtin/css_standards.md" + tags: ["css", "frontend", "coding-standards"] + description: "CSS开发的编码规范,包括命名规范、响应式设计、性能优化等" + + - title: "JavaScript编码规范" + file: "docs/builtin/javascript_standards.md" + tags: ["javascript", "frontend", "coding-standards"] + description: "JavaScript开发的编码规范,包括ES6+特性、函数式编程、性能优化等" + + - title: "Java编码规范" + file: "docs/builtin/java_standards.md" + tags: ["java", "backend", "coding-standards"] + description: "Java开发的编码规范,包括SOLID原则、并发编程、性能优化等" + + - title: "Python编码规范" + file: "docs/builtin/python_standards.md" + tags: ["python", "backend", "coding-standards"] + description: "Python开发的编码规范,包括PEP 8、类型注解、最佳实践等" + + - title: "C++编码规范" + file: "docs/builtin/cpp_standards.md" + tags: ["cpp", "backend", "coding-standards"] + description: "C++开发的编码规范,包括内存管理、RAII、并发编程等" + + - title: "Go编码规范" + file: "docs/builtin/go_standards.md" + tags: ["go", "backend", "coding-standards"] + description: "Go开发的编码规范,包括并发安全、错误处理、接口设计等" + +# 配置参数 +settings: + # 是否启用内置知识库 + enabled: true + + # 自动初始化(首次启动时加载内置文档) + auto_init: true + + # 文档编码格式 + encoding: "utf-8" \ No newline at end of file From 6900db93bdb5cc1f718fff79fb12931f9192bf92 Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:30:41 +0800 Subject: [PATCH 17/27] =?UTF-8?q?=E5=A2=9E=E5=8A=A0RAG=E6=8F=90=E7=A4=BA?= =?UTF-8?q?=E8=AF=8D=E6=A8=A1=E6=9D=BF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- conf/prompt_templates.yml | 82 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/conf/prompt_templates.yml b/conf/prompt_templates.yml index 66258255a..7b2f9fd74 100644 --- a/conf/prompt_templates.yml +++ b/conf/prompt_templates.yml @@ -41,3 +41,85 @@ code_review_prompt: 提交历史(commits): {commits_text} + +rag_code_review_prompt: + system_prompt: |- + 你是一位资深的软件开发工程师和代码审查专家,拥有丰富的技术知识和最佳实践经验。你的任务是基于代码变更和相关技术文档进行全面的代码审查。 + + ### 代码审查目标与评分标准: + 1. 与最佳实践的符合度(35分): + - 完全符合文档中的最佳实践:30-35分 + - 大部分符合但有小问题:20-29分 + - 存在明显违背最佳实践的情况:10-19分 + - 严重违背最佳实践:0-9分 + + 2. 代码质量与安全性(35分): + - 完全符合文档中的安全规范:30-35分 + - 存在轻微安全隐患:20-29分 + - 存在明显安全风险:10-19分 + - 严重安全漏洞:0-9分 + + 3. 性能与可维护性(20分): + - 完全符合性能最佳实践:15-20分 + - 轻微性能问题:10-14分 + - 明显性能隐患:5-9分 + - 严重性能问题:0-4分 + + 4. 文档一致性(10分): + - 完全遵循文档规范:8-10分 + - 部分遵循文档规范:4-7分 + - 基本不符合文档规范:0-3分 + + ### 评分规则: + 1. 严格对照检索到的技术文档进行评分 + 2. 如果某项没有相关文档参考,该项按照一般标准评分 + 3. 对于违背文档明确规定的情况,必须在对应项目显著扣分 + 4. 发现严重安全漏洞或严重违背最佳实践时,总分不得超过60分 + + ### 审查策略: + 1. 优先参考提供的相关技术文档或编码规范 + 2. 明确指出代码与文档规范的匹配程度 + 3. 对于每个问题,都要引用相关文档作为依据 + 4. 如果发现代码与文档规范不符,需要: + - 引用具体的文档内容 + - 说明不符合的具体原因 + - 提供基于文档的改进建议 + + ### 输出格式: + 请以Markdown格式输出代码审查报告,包含: + 1. 文档匹配分析:列出代码与检索到的文档的匹配程度 + 2. 问题说明:每个问题都需要引用相关文档作为依据 + 3. 改进建议:基于文档提供具体的改进方案 + 4. 评分明细:为每个评分标准提供具体分数 + 5. 总分:格式为“总分:XX分”(例如:总分:80分),确保可通过正则表达式 r"总分[::]\s*(\d+)分?") 解析出总分。 + + ### 特别说明: + 整个评论要保持{{ style }}风格 + {% if style == 'professional' %} + 评论时请使用标准的工程术语,结合技术文档保持专业严谨。 + {% elif style == 'sarcastic' %} + 评论时请大胆使用讽刺性语言,但要确保基于文档的技术指正准确。 + {% elif style == 'gentle' %} + 评论时请多用"根据最佳实践建议"、"文档中提到可以考虑"等温和措辞。 + {% elif style == 'humorous' %} + 评论时请在技术点评中加入适当幽默元素,合理使用Emoji: + - 📚 表示参考文档 + - 💡 表示最佳实践建议 + - 🐛 表示bug + - 💥 表示严重问题 + - 🎯 表示改进建议 + {% endif %} + + user_prompt: |- + 请基于代码变更和相关技术文档,以{{ style }}风格进行代码审查。 + + ## 代码变更内容: + {diffs_text} + + ## 提交历史(commits): + {commits_text} + + ## 相关技术文档和最佳实践: + {relevant_docs} + + 请严格按照检索到的技术文档中的规范和最佳实践,对代码变更进行全面审查。对于每个发现的问题,都需要引用相关文档作为依据。 From 938c2e2884c3f099c170d135020611df86d48ce4 Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:32:08 +0800 Subject: [PATCH 18/27] Create test.txt --- docs/builtin/test.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/builtin/test.txt diff --git a/docs/builtin/test.txt b/docs/builtin/test.txt new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/docs/builtin/test.txt @@ -0,0 +1 @@ + From 22897276cd9a77312dad0b90dc8e683386577ce0 Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:33:12 +0800 Subject: [PATCH 19/27] Create test.txt --- docs/examples/test.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/examples/test.txt diff --git a/docs/examples/test.txt b/docs/examples/test.txt new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/docs/examples/test.txt @@ -0,0 +1 @@ + From 58d8338343fc306734f3032ea1a4c3aca3d33f3b Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:34:02 +0800 Subject: [PATCH 20/27] =?UTF-8?q?=E5=8C=85=E5=90=AB=E4=BA=86=E5=90=84?= =?UTF-8?q?=E7=A7=8D=E7=BC=96=E7=A8=8B=E8=AF=AD=E8=A8=80=E7=9A=84=E7=A4=BA?= =?UTF-8?q?=E4=BE=8B=E4=BB=A3=E7=A0=81=EF=BC=8C=E7=94=A8=E4=BA=8ERAG?= =?UTF-8?q?=E6=B5=8B=E8=AF=95=E9=A1=B5=E9=9D=A2=E7=9A=84=E9=A2=84=E8=AE=BE?= =?UTF-8?q?=E7=A4=BA=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/examples/code_examples.py | 407 +++++++++++++++++++++++++++++++++ 1 file changed, 407 insertions(+) create mode 100644 docs/examples/code_examples.py diff --git a/docs/examples/code_examples.py b/docs/examples/code_examples.py new file mode 100644 index 000000000..120b3d8fc --- /dev/null +++ b/docs/examples/code_examples.py @@ -0,0 +1,407 @@ +""" +这个文件包含了各种编程语言的示例代码,用于RAG测试页面的预设示例。 +这些示例故意违反了编码规范,用于测试代码审查功能。 +""" + +# HTML示例:违反规范的导航栏 +html_example = ''' + + +Bad Navigation + + + + +
Content here
+ + +''' + +# CSS示例:违反BEM规范和响应式设计 +css_example = ''' +#nav123 .NAVBAR { + width: 1200px !important; + height: 80px; + background-color: red; + position: fixed; + top: 0px; + left: 50%; + margin-left: -600px; +} + +.NAVBAR div div div { + float: left; + margin-left: 50px; +} + +.NAVBAR div div div a { + color: white; + font-size: 16px; + text-decoration: none; +} + +.NAVBAR div:nth-child(2) { + float: right; + margin-right: 20px; + cursor: pointer; +} + +.NAVBAR div:nth-child(2) span { + display: block; + width: 25px; + height: 3px; + background: white; + margin: 5px 0; +} + +#NAV_LINKS { + position: absolute; + top: 80px; + left: 0; + width: 1200px; + background: red; + display: none; +} + +#NAV_LINKS.show { + display: block; +} + +@media screen and (max-width: 768px) { + /* 没有移动端适配 */ +} +''' + +# JavaScript示例:违反ES6+规范和最佳实践 +js_example = ''' +var navbar; +var navlinks; +var isopen = false; + +function init() { + navbar = document.getElementById('nav123'); + navlinks = document.getElementById('NAV_LINKS'); + + document.querySelector('.NAVBAR div:nth-child(2)').onclick = function() { + toggle(); + } + + window.onresize = function() { + resize(); + } +} + +function toggle() { + if (isopen == true) { + navlinks.className = ''; + isopen = false; + } else { + navlinks.className = 'show'; + isopen = true; + } +} + +function resize() { + if (window.innerWidth > 768) { + navlinks.className = ''; + isopen = false; + } +} + +window.onload = function() { + init(); +} +''' + +# Java示例:违反Java编码规范 +java_example = ''' +import java.util.*; + +public class todoservice { + public static Map TODOS = new HashMap<>(); + public static int ID = 1; + + public static Object createtodo(String title, String desc) { + Map todo = new HashMap<>(); + todo.put("id", ID++); + todo.put("title", title); + todo.put("desc", desc); + todo.put("status", "todo"); + todo.put("created", new Date().toString()); + + TODOS.put(String.valueOf(ID), todo); + return todo; + } + + public static Object gettodo(String id) { + return TODOS.get(id); + } + + public static List getalltodos() { + List result = new ArrayList<>(); + for(Map.Entry entry : TODOS.entrySet()) { + result.add(entry.getValue()); + } + return result; + } + + public static void updatestatus(String id, String status) { + Object todo = TODOS.get(id); + if(todo != null) { + ((Map)todo).put("status", status); + ((Map)todo).put("updated", new Date().toString()); + } + } + + public static void main(String[] args) { + createtodo("Test Task", "This is a test"); + System.out.println(getalltodos()); + } +} +''' + +# Python示例:违反PEP8和类型注解规范 +python_example = ''' +import uuid,datetime +from enum import Enum + +class taskstatus(Enum): + TODO="todo" + DOING="doing" + DONE="done" + +class task: + def __init__(self,id,title,desc,status,created,updated=None): + self.id=id + self.title=title + self.desc=desc + self.status=status + self.created=created + self.updated=updated + +class TaskManager: + def __init__(self): + self.tasks={} + + def create_task(self,title,desc): + id=str(uuid.uuid4()) + t=task(id,title,desc,taskstatus.TODO,datetime.datetime.now()) + self.tasks[id]=t + return t + + def get_task(self,id): + if id in self.tasks: + return self.tasks[id] + else: + return None + + def list_tasks(self): + result=[] + for k,v in self.tasks.items(): + result.append(v) + return result + + def update_task_status(self,id,status): + if id in self.tasks: + self.tasks[id].status=status + self.tasks[id].updated=datetime.datetime.now() + return self.tasks[id] + return None + +# 全局变量 +manager=TaskManager() + +def create(title,desc): + return manager.create_task(title,desc) + +def get(id): + return manager.get_task(id) +''' + +# C++示例:违反C++编码规范 +cpp_example = ''' +#include +#include +#include +#include +using namespace std; + +enum taskstatus { + TODO, + DOING, + DONE +}; + +class task { +public: + string id; + string title; + string desc; + taskstatus status; + + task(string i, string t, string d) { + id = i; + title = t; + desc = d; + status = TODO; + } +}; + +class TaskManager { +private: + map tasks; + int idcounter; + +public: + TaskManager() { + idcounter = 1; + } + + task* createTask(string title, string desc) { + string id = to_string(idcounter++); + task* t = new task(id, title, desc); + tasks[id] = t; + return t; + } + + task* getTask(string id) { + if(tasks.find(id) != tasks.end()) { + return tasks[id]; + } + return NULL; + } + + vector listTasks() { + vector result; + for(auto it = tasks.begin(); it != tasks.end(); it++) { + result.push_back(it->second); + } + return result; + } + + void updateStatus(string id, taskstatus status) { + task* t = getTask(id); + if(t != NULL) { + t->status = status; + } + } + + ~TaskManager() { + for(auto it = tasks.begin(); it != tasks.end(); it++) { + delete it->second; + } + } +}; + +TaskManager* manager = new TaskManager(); + +void createTask(string title, string desc) { + manager->createTask(title, desc); +} +''' + +# Go示例:违反Go编码规范和并发安全 +go_example = ''' +package taskmanager + +import ( + "strconv" + "time" +) + +type taskstatus string + +const ( + TODO taskstatus = "todo" + DOING taskstatus = "doing" + DONE taskstatus = "done" +) + +type task struct { + Id string + title string + description string + Status taskstatus + created_at time.Time + updated_at *time.Time +} + +type TaskManager struct { + tasks map[string]*task + id_counter int +} + +var manager *TaskManager + +func init() { + manager = &TaskManager{ + tasks: make(map[string]*task), + id_counter: 1, + } +} + +func (tm *TaskManager) createTask(title, description string) *task { + id := strconv.Itoa(tm.id_counter) + tm.id_counter++ + + t := &task{ + Id: id, + title: title, + description: description, + Status: TODO, + created_at: time.Now(), + } + + tm.tasks[id] = t + return t +} + +func (tm *TaskManager) GetTask(id string) *task { + if t, ok := tm.tasks[id]; ok { + return t + } + return nil +} + +func (tm *TaskManager) ListTasks() []*task { + var tasks []*task + for _, task := range tm.tasks { + tasks = append(tasks, task) + } + return tasks +} + +func (tm *TaskManager) updateTaskStatus(id string, status taskstatus) *task { + if task, exists := tm.tasks[id]; exists { + task.Status = status + now := time.Now() + task.updated_at = &now + return task + } + return nil +} + +func CreateTask(title, description string) *task { + return manager.createTask(title, description) +} + +func GetTask(id string) *task { + return manager.GetTask(id) +} +''' \ No newline at end of file From 1a6ef14f4c6ce1817bde8e940d92cfb3eb8cb28f Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:34:21 +0800 Subject: [PATCH 21/27] Delete docs/examples/test.txt --- docs/examples/test.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 docs/examples/test.txt diff --git a/docs/examples/test.txt b/docs/examples/test.txt deleted file mode 100644 index 8b1378917..000000000 --- a/docs/examples/test.txt +++ /dev/null @@ -1 +0,0 @@ - From d002c3a22cd11e002875bb573eb31d5723cfab19 Mon Sep 17 00:00:00 2001 From: conan <55241228+wengqian66@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:35:15 +0800 Subject: [PATCH 22/27] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=B8=8D=E5=90=8C?= =?UTF-8?q?=E7=B1=BB=E5=9E=8B=E4=BB=A3=E7=A0=81=E5=86=85=E7=BD=AE=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E6=96=87=E6=A1=A3=E7=9B=AE=E5=BD=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/builtin/cpp_standards.md | 107 ++++++++++++++ docs/builtin/css_standards.md | 83 +++++++++++ docs/builtin/go_standards.md | 55 +++++++ docs/builtin/html_standards.md | 57 ++++++++ docs/builtin/java_standards.md | 156 ++++++++++++++++++++ docs/builtin/javascript_standards.md | 59 ++++++++ docs/builtin/python_standards.md | 211 +++++++++++++++++++++++++++ 7 files changed, 728 insertions(+) create mode 100644 docs/builtin/cpp_standards.md create mode 100644 docs/builtin/css_standards.md create mode 100644 docs/builtin/go_standards.md create mode 100644 docs/builtin/html_standards.md create mode 100644 docs/builtin/java_standards.md create mode 100644 docs/builtin/javascript_standards.md create mode 100644 docs/builtin/python_standards.md diff --git a/docs/builtin/cpp_standards.md b/docs/builtin/cpp_standards.md new file mode 100644 index 000000000..ce92fecbc --- /dev/null +++ b/docs/builtin/cpp_standards.md @@ -0,0 +1,107 @@ +# C++编码规范 + +**C++代码规范 | CPP编程标准 | C++最佳实践 | C++代码审查** + +C++编程语言的编码规范和最佳实践指南,适用于代码审查和质量控制。 + +--- + +## 1. 命名规范 + +### 1.1 通用规则 +- 使用有意义的名称 +- 避免缩写(除非广泛使用) +- 保持一致性 + +### 1.2 具体规范 +**类名**:使用PascalCase,如TaskManager、ResourceHandler +**函数名**:使用camelCase,如processTask、isValid +**变量名**:使用camelCase,如itemCount、firstName +**常量**:使用UPPER_SNAKE_CASE,如MAX_ITEMS、PI +**命名空间**:使用小写字母,如utils、database + +## 2. 内存管理 + +### 2.1 智能指针 +**原则**:优先使用智能指针,避免裸指针 + +**unique_ptr**:管理独占资源,不能被拷贝 +**shared_ptr**:管理共享资源,支持引用计数 + +**最佳实践**: +- 使用std::make_unique和std::make_shared +- 避免使用裸指针 +- 注意避免循环引用 + +### 2.2 RAII原则 +**原则**:构造函数获取资源,析构函数释放资源 + +**移动语义**:使用移动构造函数和移动赋值运算符提高性能 + +## 3. 现代C++特性 + +### 3.1 auto关键字 +**规范**:在类型明显时使用auto,提高可读性 + +**最佳实践**: +- 避免过度使用auto +- 在lambda表达式和模板中使用 + +### 3.2 范围for循环 +**使用**:简化容器遍历,提高可读性 + +### 3.3 Lambda表达式 +**使用**:创建匿名函数,适用于函数对象场景 + +**规范**: +- 使用auto接收lambda表达式 +- 合理使用捕获列表 +- 避免捕获大对象 + +## 4. 异常处理 + +### 4.1 异常安全 +**保证**:函数提供异常安全保证,保持程序状态一致 + +**策略**: +- 使用RAII管理资源 +- 避免析构函数抛出异常 +- 使用智能指针避免泄漏 + +### 4.2 异常处理最佳实践 +**原则**: +- 只对异常情况使用异常处理 +- 提供有意义异常信息 +- 使用适当异常类型 +- 避免影响性能 + +## 5. 模板编程 + +### 5.1 函数模板 +**设计**:实现泛型编程,提高代码复用性 + +**最佳实践**: +- 使用概念约束模板参数 +- 避免过度复杂模板元编程 +- 提供清晰错误信息 + +### 5.2 类模板 +**设计**:提供类型安全的泛型容器和算法 + +## 6. 并发编程 + +### 6.1 线程安全 +**设计**:确保共享数据访问线程安全 + +**策略**: +- 使用std::mutex保护共享数据 +- 使用std::atomic进行原子操作 +- 避免数据竞争和死锁 + +### 6.2 异步编程 +**模式**:使用std::future和std::async + +**最佳实践**: +- 合理使用异步操作 +- 避免过度使用线程 +- 使用线程池管理资源 \ No newline at end of file diff --git a/docs/builtin/css_standards.md b/docs/builtin/css_standards.md new file mode 100644 index 000000000..14eba6f9c --- /dev/null +++ b/docs/builtin/css_standards.md @@ -0,0 +1,83 @@ +# CSS编码规范 + +**CSS代码规范 | CSS编程标准 | CSS最佳实践 | CSS代码审查** + +CSS样式表语言的编码规范和最佳实践指南,适用于代码审查和质量控制。 + +--- + +## 1. 基本原则 + +- 使用BEM命名方法论 +- 保持代码简洁和可维护 +- 优先使用类选择器 +- 避免过度嵌套 +- 注重代码复用 + +## 2. 命名规范 + +### BEM命名方法论 +**原则**:块、元素、修饰符的组合创建清晰类名结构 + +**块(Block)**:独立组件,如header、menu、button +**元素(Element)**:块的一部分,如header__title、menu__item +**修饰符(Modifier)**:改变外观或行为,如button--primary、menu__item--active + +### 通用命名规则 +**格式规范**: +- 使用小写字母 +- 使用连字符(-)连接单词 +- 使用双下划线(__)表示元素关系 +- 使用双连字符(--)表示修饰符 + +## 3. 代码组织 + +**属性排序**:按逻辑顺序组织CSS属性 + +**属性分组**: +- 定位:position、top、right、bottom、left、z-index +- 盒模型:display、float、width、height、margin、padding、border +- 排版:font、line-height、text-align、word-wrap +- 视觉效果:background、color、opacity、box-shadow +- 其他:cursor、overflow、transition + +## 4. 响应式设计 + +**移动优先**:先为移动设备编写样式,再用媒体查询为大屏幕添加样式 +**相对单位**:优先使用rem、em、vw、vh而不是固定像素 +**断点设置**:设置合理断点,如768px(平板)、1024px(桌面) +**布局适配**:使用百分比、flexbox或grid布局 + +## 5. 性能优化 + +**文件优化**: +- 避免@import,使用link标签 +- 合并和压缩CSS文件 +- 使用CSS Sprites减少图片请求 +- 移除未使用的CSS代码 + +**选择器优化**: +- 避免复杂选择器 +- 优先使用类选择器 +- 避免通配符选择器 +- 减少嵌套层级 + +**动画优化**: +- 优先使用CSS3动画 +- 使用transform和opacity +- 避免影响页面布局 +- 使用will-change优化性能 + +## 6. 浏览器兼容性 + +**前缀处理**:使用Autoprefixer自动处理浏览器前缀 +**浏览器测试**:测试主流浏览器兼容性 +**优雅降级**:为不支持新特性的浏览器提供基础样式 +**样式重置**:使用normalize.css确保一致基础样式 + +## 7. CSS变量使用 + +**变量定义**:在:root中定义可复用值,如颜色、字体、间距 +**变量命名**:使用kebab-case,以--开头,如--primary-color +**变量使用**:使用var()函数引用变量 +**变量作用域**:遵循CSS级联规则,可重新定义 \ No newline at end of file diff --git a/docs/builtin/go_standards.md b/docs/builtin/go_standards.md new file mode 100644 index 000000000..5dd437727 --- /dev/null +++ b/docs/builtin/go_standards.md @@ -0,0 +1,55 @@ +# Go编码规范 + +**Go代码规范 | Go编程标准 | Go最佳实践 | Go代码审查** + +Go编程语言的编码规范和最佳实践指南,适用于代码审查和质量控制。 + +--- + +## 1. 基本原则 + +- 简洁性和可读性优先 +- 遵循Go的惯用语法 +- 使用gofmt格式化代码 +- 编写文档注释 +- 注重错误处理 + +## 2. 命名规范 + +**包名**:使用小写单词,如userservice、database +**接口名**:以er结尾,如Reader、Writer、Handler +**结构体和方法**:使用PascalCase,如UserManager、CreateUser +**常量**:使用PascalCase或全大写,如StatusActive、MAX_RETRIES + +## 3. 错误处理 + +**原则**:显式错误处理,每个可能出错的函数返回error类型 + +**策略**: +- 检查参数有效性 +- 使用fmt.Errorf包装错误 +- 使用errors.New创建简单错误 +- 使用自定义错误类型 + +## 4. 并发编程 + +### Goroutines +**原则**:轻量级线程,注意资源管理和同步 + +**最佳实践**: +- 避免创建过多Goroutine +- 使用sync.WaitGroup等待完成 +- 使用channel进行通信 +- 注意生命周期管理 + +### Channels +**规范**:Goroutine间通信的主要方式 + +**最佳实践**: +- 无缓冲channel用于同步通信 +- 有缓冲channel用于异步通信 +- 使用select处理多个channel +- 及时关闭不再使用的channel + +### 同步机制 +**互斥锁**:使用sync.Mutex保护共享资源,用defer确保释放 \ No newline at end of file diff --git a/docs/builtin/html_standards.md b/docs/builtin/html_standards.md new file mode 100644 index 000000000..729e9aac3 --- /dev/null +++ b/docs/builtin/html_standards.md @@ -0,0 +1,57 @@ +# HTML编码规范 + +**HTML代码规范 | HTML编程标准 | HTML最佳实践 | HTML代码审查** + +HTML标记语言的编码规范和最佳实践指南,适用于代码审查和质量控制。 + +--- + +## 1. 基本原则 + +- 使用HTML5文档类型:`` +- 使用语义化标签:`
`, `