diff --git a/src/memos/api/product_api.py b/src/memos/api/product_api.py index 681644a0..709ad74f 100644 --- a/src/memos/api/product_api.py +++ b/src/memos/api/product_api.py @@ -33,6 +33,6 @@ parser = argparse.ArgumentParser() parser.add_argument("--port", type=int, default=8001) - parser.add_argument("--workers", type=int, default=32) + parser.add_argument("--workers", type=int, default=1) args = parser.parse_args() uvicorn.run("memos.api.product_api:app", host="0.0.0.0", port=args.port, workers=args.workers) diff --git a/src/memos/llms/openai.py b/src/memos/llms/openai.py index 698bc326..47818a94 100644 --- a/src/memos/llms/openai.py +++ b/src/memos/llms/openai.py @@ -11,6 +11,7 @@ from memos.llms.utils import remove_thinking_tags from memos.log import get_logger from memos.types import MessageList +from memos.utils import timed logger = get_logger(__name__) @@ -56,6 +57,7 @@ def clear_cache(cls): cls._instances.clear() logger.info("OpenAI LLM instance cache cleared") + @timed def generate(self, messages: MessageList) -> str: """Generate a response from OpenAI LLM.""" response = self.client.chat.completions.create( @@ -73,6 +75,7 @@ def generate(self, messages: MessageList) -> str: else: return response_content + @timed def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, None, None]: """Stream response from OpenAI LLM with optional reasoning support.""" response = self.client.chat.completions.create( diff --git a/src/memos/llms/qwen.py b/src/memos/llms/qwen.py index a47fcdf3..b2d662ff 100644 --- a/src/memos/llms/qwen.py +++ b/src/memos/llms/qwen.py @@ -5,6 +5,7 @@ from memos.llms.utils import remove_thinking_tags from memos.log import get_logger from memos.types import MessageList +from memos.utils import timed logger = get_logger(__name__) @@ -16,6 +17,7 @@ class QwenLLM(OpenAILLM): def __init__(self, config: QwenLLMConfig): super().__init__(config) + @timed def generate(self, messages: MessageList) -> str: """Generate a response from Qwen LLM.""" response = self.client.chat.completions.create( @@ -33,6 +35,7 @@ def generate(self, messages: MessageList) -> str: else: return response_content + @timed def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, None, None]: """Stream response from Qwen LLM.""" response = self.client.chat.completions.create( diff --git a/src/memos/mem_os/product.py b/src/memos/mem_os/product.py index a4ab4ef2..d6464389 100644 --- a/src/memos/mem_os/product.py +++ b/src/memos/mem_os/product.py @@ -179,14 +179,14 @@ def _restore_user_instances( """ try: # Get all user configurations from persistent storage - user_configs = self.user_manager.list_user_configs() + user_configs = self.user_manager.list_user_configs(self.max_user_instances) # Get the raw database records for sorting by updated_at session = self.user_manager._get_session() try: from memos.mem_user.persistent_user_manager import UserConfig - db_configs = session.query(UserConfig).all() + db_configs = session.query(UserConfig).limit(self.max_user_instances).all() # Create a mapping of user_id to updated_at timestamp updated_at_map = {config.user_id: config.updated_at for config in db_configs} diff --git a/src/memos/mem_user/mysql_persistent_user_manager.py b/src/memos/mem_user/mysql_persistent_user_manager.py index f8983c87..99e49d20 100644 --- a/src/memos/mem_user/mysql_persistent_user_manager.py +++ b/src/memos/mem_user/mysql_persistent_user_manager.py @@ -188,7 +188,7 @@ def delete_user_config(self, user_id: str) -> bool: finally: session.close() - def list_user_configs(self) -> dict[str, MOSConfig]: + def list_user_configs(self, limit: int = 1) -> dict[str, MOSConfig]: """List all user configurations. Returns: @@ -196,7 +196,7 @@ def list_user_configs(self) -> dict[str, MOSConfig]: """ session = self._get_session() try: - user_configs = session.query(UserConfig).all() + user_configs = session.query(UserConfig).limit(limit).all() result = {} for user_config in user_configs: