diff --git a/frontend/public/locales/en/common.json b/frontend/public/locales/en/common.json index 5d867fe64..80b1b230d 100644 --- a/frontend/public/locales/en/common.json +++ b/frontend/public/locales/en/common.json @@ -854,7 +854,7 @@ "mcpConfig.modal.updatingTools": "Updating tools list...", "mcpConfig.addServer.title": "Add MCP Server", "mcpConfig.addServer.namePlaceholder": "Server name", - "mcpConfig.addServer.urlPlaceholder": "Server URL (e.g.: http://localhost:3001/sse), currently only SSE protocol supported", + "mcpConfig.addServer.urlPlaceholder": "Server URL (e.g.: http://localhost:3001/mcp), currently supports sse and streamable-http protocols", "mcpConfig.addServer.button.add": "Add", "mcpConfig.addServer.button.updating": "Updating...", "mcpConfig.serverList.title": "Configured MCP Servers", diff --git a/frontend/public/locales/zh/common.json b/frontend/public/locales/zh/common.json index dded1efb0..77a1b2c2e 100644 --- a/frontend/public/locales/zh/common.json +++ b/frontend/public/locales/zh/common.json @@ -854,7 +854,7 @@ "mcpConfig.modal.updatingTools": "正在更新工具列表...", "mcpConfig.addServer.title": "添加MCP服务器", "mcpConfig.addServer.namePlaceholder": "服务器名称", - "mcpConfig.addServer.urlPlaceholder": "服务器URL (如: http://localhost:3001/sse),目前仅支持sse协议", + "mcpConfig.addServer.urlPlaceholder": "服务器URL (如: http://localhost:3001/mcp),目前支持sse和streamable-http协议", "mcpConfig.addServer.button.add": "添加", "mcpConfig.addServer.button.updating": "更新中...", "mcpConfig.serverList.title": "已配置的MCP服务器", diff --git a/sdk/nexent/core/agents/agent_model.py b/sdk/nexent/core/agents/agent_model.py index 6eff00718..f3c5a77b7 100644 --- a/sdk/nexent/core/agents/agent_model.py +++ b/sdk/nexent/core/agents/agent_model.py @@ -1,7 +1,7 @@ from __future__ import annotations from threading import Event -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union from pydantic import BaseModel, Field @@ -50,7 +50,12 @@ class AgentRunInfo(BaseModel): model_config_list: List[ModelConfig] = Field(description="List of model configurations") observer: MessageObserver = Field(description="Return data") agent_config: AgentConfig = Field(description="Detailed Agent configuration") - mcp_host: Optional[List[str]] = Field(description="MCP server address", default=None) + mcp_host: Optional[List[Union[str, Dict[str, Any]]]] = Field( + description="MCP server address(es). Can be a string (URL) or dict with 'url' and 'transport' keys. " + "Transport can be 'sse' or 'streamable-http'. If string, transport is auto-detected based on URL ending: " + "URLs ending with '/sse' use 'sse' transport, URLs ending with '/mcp' use 'streamable-http' transport.", + default=None + ) history: Optional[List[AgentHistory]] = Field(description="Historical conversation information", default=None) stop_event: Event = Field(description="Stop event control") diff --git a/sdk/nexent/core/agents/core_agent.py b/sdk/nexent/core/agents/core_agent.py index 826ef7093..be7b83b5e 100644 --- a/sdk/nexent/core/agents/core_agent.py +++ b/sdk/nexent/core/agents/core_agent.py @@ -1,3 +1,4 @@ +import json import re import ast import time @@ -9,12 +10,13 @@ from rich.console import Group from rich.text import Text -from smolagents.agents import CodeAgent, handle_agent_output_types, AgentError +from smolagents.agents import CodeAgent, handle_agent_output_types, AgentError, ActionOutput, RunResult from smolagents.local_python_executor import fix_final_answer_code from smolagents.memory import ActionStep, PlanningStep, FinalAnswerStep, ToolCall, TaskStep, SystemPromptStep -from smolagents.models import ChatMessage -from smolagents.monitoring import LogLevel -from smolagents.utils import AgentExecutionError, AgentGenerationError, truncate_content +from smolagents.models import ChatMessage, CODEAGENT_RESPONSE_FORMAT +from smolagents.monitoring import LogLevel, Timing, YELLOW_HEX, TokenUsage +from smolagents.utils import AgentExecutionError, AgentGenerationError, truncate_content, AgentMaxStepsError, \ + extract_code_from_text from ..utils.observer import MessageObserver, ProcessType from jinja2 import Template, StrictUndefined @@ -125,13 +127,17 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[Any]: # Add new step in logs memory_step.model_input_messages = input_messages + stop_sequences = ["", "Observation:", "Calling tools:", "", "Observation:", "Calling tools:", " Generator[Any]: # Parse try: - code_action = fix_final_answer_code(parse_code_blobs(model_output)) + if self._use_structured_outputs_internally: + code_action = json.loads(model_output)["code"] + code_action = extract_code_from_text(code_action, self.code_block_tags) or code_action + else: + code_action = parse_code_blobs(model_output) + code_action = fix_final_answer_code(code_action) + memory_step.code_action = code_action # Record parsing results self.observer.add_message( self.agent_name, ProcessType.PARSE, code_action) @@ -155,26 +167,29 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[Any]: content=model_output, title="AGENT FINAL ANSWER", level=LogLevel.INFO) raise FinalAnswerError() - memory_step.tool_calls = [ - ToolCall(name="python_interpreter", arguments=code_action, id=f"call_{len(self.memory.steps)}", )] + tool_call = ToolCall( + name="python_interpreter", + arguments=code_action, + id=f"call_{len(self.memory.steps)}", + ) + memory_step.tool_calls = [tool_call] # Execute self.logger.log_code(title="Executing parsed code:", content=code_action, level=LogLevel.INFO) - is_final_answer = False try: - output, execution_logs, is_final_answer = self.python_executor( - code_action) - + code_output = self.python_executor(code_action) execution_outputs_console = [] - if len(execution_logs) > 0: + if len(code_output.logs) > 0: # Record execution results self.observer.add_message( - self.agent_name, ProcessType.EXECUTION_LOGS, f"{execution_logs}") + self.agent_name, ProcessType.EXECUTION_LOGS, f"{code_output.logs}") execution_outputs_console += [ - Text("Execution logs:", style="bold"), Text(execution_logs), ] - observation = "Execution logs:\n" + execution_logs + Text("Execution logs:", style="bold"), + Text(code_output.logs), + ] + observation = "Execution logs:\n" + code_output.logs except Exception as e: if hasattr(self.python_executor, "state") and "_print_outputs" in self.python_executor.state: execution_logs = str( @@ -196,20 +211,24 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[Any]: level=LogLevel.INFO, ) raise AgentExecutionError(error_msg, self.logger) - truncated_output = truncate_content(str(output)) - if output is not None: + truncated_output = None + if code_output is not None and code_output.output is not None: + truncated_output = truncate_content(str(code_output.output)) observation += "Last output from code snippet:\n" + truncated_output memory_step.observations = observation - execution_outputs_console += [ - Text(f"{('Out - Final answer' if is_final_answer else 'Out')}: {truncated_output}", - style=("bold #d4b702" if is_final_answer else ""), ), ] + if not code_output.is_final_answer and truncated_output is not None: + execution_outputs_console += [ + Text( + f"Out: {truncated_output}", + ), + ] self.logger.log(Group(*execution_outputs_console), level=LogLevel.INFO) - memory_step.action_output = output - yield output if is_final_answer else None + memory_step.action_output = code_output.output + yield ActionOutput(output=code_output.output, is_final_answer=code_output.is_final_answer) def run(self, task: str, stream: bool = False, reset: bool = True, images: Optional[List[str]] = None, - additional_args: Optional[Dict] = None, max_steps: Optional[int] = None, ): + additional_args: Optional[Dict] = None, max_steps: Optional[int] = None, return_full_result: bool | None = None): """ Run the agent for the given task. @@ -220,6 +239,8 @@ def run(self, task: str, stream: bool = False, reset: bool = True, images: Optio images (`list[str]`, *optional*): Paths to image(s). additional_args (`dict`, *optional*): Any other variables that you want to pass to the agent run, for instance images or dataframes. Give them clear names! max_steps (`int`, *optional*): Maximum number of steps the agent can take to solve the task. if not provided, will use the agent's default value. + return_full_result (`bool`, *optional*): Whether to return the full [`RunResult`] object or just the final answer output. + If `None` (default), the agent's `self.return_full_result` setting is used. Example: ```py @@ -236,7 +257,6 @@ def run(self, task: str, stream: bool = False, reset: bool = True, images: Optio You have been provided with these additional arguments, that you can access using the keys as variables in your python code: {str(additional_args)}.""" - self.system_prompt = self.initialize_system_prompt() self.memory.system_prompt = SystemPromptStep( system_prompt=self.system_prompt) if reset: @@ -261,8 +281,47 @@ def run(self, task: str, stream: bool = False, reset: bool = True, images: Optio if stream: # The steps are returned as they are executed through a generator to iterate on. return self._run_stream(task=self.task, max_steps=max_steps, images=images) + run_start_time = time.time() + steps = list(self._run_stream(task=self.task, max_steps=max_steps, images=images)) + # Outputs are returned only at the end. We only look at the last step. - return list(self._run_stream(task=self.task, max_steps=max_steps, images=images))[-1].final_answer + assert isinstance(steps[-1], FinalAnswerStep) + output = steps[-1].output + + return_full_result = return_full_result if return_full_result is not None else self.return_full_result + if return_full_result: + total_input_tokens = 0 + total_output_tokens = 0 + correct_token_usage = True + for step in self.memory.steps: + if isinstance(step, (ActionStep, PlanningStep)): + if step.token_usage is None: + correct_token_usage = False + break + else: + total_input_tokens += step.token_usage.input_tokens + total_output_tokens += step.token_usage.output_tokens + if correct_token_usage: + token_usage = TokenUsage(input_tokens=total_input_tokens, output_tokens=total_output_tokens) + else: + token_usage = None + + if self.memory.steps and isinstance(getattr(self.memory.steps[-1], "error", None), AgentMaxStepsError): + state = "max_steps_error" + else: + state = "success" + + step_dicts = self.memory.get_full_steps() + + return RunResult( + output=output, + token_usage=token_usage, + steps=step_dicts, + timing=Timing(start_time=run_start_time, end_time=time.time()), + state=state, + ) + + return output def __call__(self, task: str, **kwargs): """Adds additional prompting for the managed agent, runs it, and wraps the output. @@ -271,7 +330,11 @@ def __call__(self, task: str, **kwargs): full_task = Template(self.prompt_templates["managed_agent"]["task"], undefined=StrictUndefined).render({ "name": self.name, "task": task, **self.state }) - report = self.run(full_task, **kwargs) + result = self.run(full_task, **kwargs) + if isinstance(result, RunResult): + report = result.output + else: + report = result # When a sub-agent finishes running, return a marker try: @@ -286,7 +349,7 @@ def __call__(self, task: str, **kwargs): if self.provide_run_summary: answer += "\n\nFor more detail, find below a summary of this agent's work:\n\n" for message in self.write_memory_to_messages(summary_mode=True): - content = message["content"] + content = message.content answer += "\n" + truncate_content(str(content)) + "\n---" answer += "\n" return answer @@ -295,28 +358,44 @@ def _run_stream( self, task: str, max_steps: int, images: list["PIL.Image.Image"] | None = None ) -> Generator[ActionStep | PlanningStep | FinalAnswerStep]: final_answer = None + action_step = None self.step_number = 1 - while final_answer is None and self.step_number <= max_steps and not self.stop_event.is_set(): + returned_final_answer = False + while not returned_final_answer and self.step_number <= max_steps and not self.stop_event.is_set(): step_start_time = time.time() action_step = ActionStep( - step_number=self.step_number, start_time=step_start_time, observations_images=images + step_number=self.step_number, timing=Timing(start_time=step_start_time), observations_images=images ) try: - for el in self._execute_step(action_step): - yield el - final_answer = el + for output in self._step_stream(action_step): + yield output + + if isinstance(output, ActionOutput) and output.is_final_answer: + final_answer = output.output + self.logger.log( + Text(f"Final answer: {final_answer}", style=f"bold {YELLOW_HEX}"), + level=LogLevel.INFO, + ) + + if self.final_answer_checks: + self._validate_final_answer(final_answer) + returned_final_answer = True + action_step.is_final_answer = True + except FinalAnswerError: # When the model does not output code, directly treat the large model content as the final answer final_answer = action_step.model_output if isinstance(final_answer, str): final_answer = convert_code_format(final_answer) + returned_final_answer = True + action_step.is_final_answer = True except AgentError as e: action_step.error = e finally: - self._finalize_step(action_step, step_start_time) + self._finalize_step(action_step) self.memory.steps.append(action_step) yield action_step self.step_number += 1 @@ -324,8 +403,7 @@ def _run_stream( if self.stop_event.is_set(): final_answer = "" - if final_answer is None and self.step_number == max_steps + 1: - final_answer = self._handle_max_steps_reached( - task, images, step_start_time) + if not returned_final_answer and self.step_number == max_steps + 1: + final_answer = self._handle_max_steps_reached(task) yield action_step yield FinalAnswerStep(handle_agent_output_types(final_answer)) diff --git a/sdk/nexent/core/agents/nexent_agent.py b/sdk/nexent/core/agents/nexent_agent.py index f0f932389..d109e3428 100644 --- a/sdk/nexent/core/agents/nexent_agent.py +++ b/sdk/nexent/core/agents/nexent_agent.py @@ -1,8 +1,9 @@ import re +import time from threading import Event from typing import List -from smolagents import ActionStep, AgentText, TaskStep +from smolagents import ActionStep, AgentText, TaskStep, Timing from smolagents.tools import Tool from ..models.openai_llm import OpenAIModel @@ -195,7 +196,9 @@ def add_history_to_agent(self, history: List[AgentHistory]): # Create task step for user message self.agent.memory.steps.append(TaskStep(task=msg.content)) elif msg.role == 'assistant': - self.agent.memory.steps.append(ActionStep(action_output=msg.content, model_output=msg.content)) + self.agent.memory.steps.append(ActionStep(step_number=len(self.agent.memory.steps) + 1, + timing=Timing(start_time=time.time()), + action_output=msg.content, model_output=msg.content)) def agent_run_with_observer(self, query: str, reset=True): if not isinstance(self.agent, CoreAgent): @@ -214,7 +217,7 @@ def agent_run_with_observer(self, query: str, reset=True): if hasattr(step_log, "error") and step_log.error is not None: observer.add_message("", ProcessType.ERROR, str(step_log.error)) - final_answer = step_log.final_answer # Last log is the run's final_answer + final_answer = step_log.output # Last log is the run's final_answer if isinstance(final_answer, AgentText): final_answer_str = convert_code_format(final_answer.to_string()) diff --git a/sdk/nexent/core/agents/run_agent.py b/sdk/nexent/core/agents/run_agent.py index 41429367a..8a5a67517 100644 --- a/sdk/nexent/core/agents/run_agent.py +++ b/sdk/nexent/core/agents/run_agent.py @@ -1,6 +1,7 @@ import asyncio import logging from threading import Thread +from typing import Any, Dict, Union from smolagents import ToolCollection @@ -13,6 +14,56 @@ monitoring_manager = get_monitoring_manager() +def _detect_transport(url: str) -> str: + """ + Auto-detect MCP transport type based on URL format. + + Args: + url: MCP server URL + + Returns: + Transport type: 'sse' or 'streamable-http' + """ + url_stripped = url.strip() + + # Check URL ending to determine transport type + if url_stripped.endswith("/sse"): + return "sse" + elif url_stripped.endswith("/mcp"): + return "streamable-http" + + # Default to streamable-http for unrecognized formats + return "streamable-http" + + +def _normalize_mcp_config(mcp_host_item: Union[str, Dict[str, Any]]) -> Dict[str, Any]: + """ + Normalize MCP host configuration to a dictionary format. + + Args: + mcp_host_item: Either a string URL or a dict with 'url' and optional 'transport' + + Returns: + Dictionary with 'url' and 'transport' keys + """ + if isinstance(mcp_host_item, str): + url = mcp_host_item + transport = _detect_transport(url) + return {"url": url, "transport": transport} + elif isinstance(mcp_host_item, dict): + url = mcp_host_item.get("url") + if not url: + raise ValueError("MCP host dict must contain 'url' key") + transport = mcp_host_item.get("transport") + if not transport: + transport = _detect_transport(url) + if transport not in ("sse", "streamable-http"): + raise ValueError(f"Invalid transport type: {transport}. Must be 'sse' or 'streamable-http'") + return {"url": url, "transport": transport} + else: + raise ValueError(f"Invalid MCP host item type: {type(mcp_host_item)}. Must be str or dict") + + @monitoring_manager.monitor_endpoint("agent_run_thread", "agent_run_thread") def agent_run_thread(agent_run_info: AgentRunInfo): try: @@ -31,7 +82,8 @@ def agent_run_thread(agent_run_info: AgentRunInfo): else: agent_run_info.observer.add_message( "", ProcessType.AGENT_NEW_RUN, "") - mcp_client_list = [{"url": mcp_url} for mcp_url in mcp_host] + # Normalize MCP host configurations to support both string and dict formats + mcp_client_list = [_normalize_mcp_config(item) for item in mcp_host] with ToolCollection.from_mcp(mcp_client_list, trust_remote_code=True) as tool_collection: nexent = NexentAgent( diff --git a/sdk/nexent/core/models/openai_llm.py b/sdk/nexent/core/models/openai_llm.py index 1a52e2d29..e4cfbc990 100644 --- a/sdk/nexent/core/models/openai_llm.py +++ b/sdk/nexent/core/models/openai_llm.py @@ -46,7 +46,7 @@ def __init__(self, observer: MessageObserver, temperature=0.2, top_p=0.95, @get_monitoring_manager().monitor_llm_call("openai_chat", "chat_completion") def __call__(self, messages: List[Dict[str, Any]], stop_sequences: Optional[List[str]] = None, - grammar: Optional[str] = None, tools_to_call_from: Optional[List[Tool]] = None, **kwargs, ) -> ChatMessage: + response_format: dict[str, str] | None = None, tools_to_call_from: Optional[List[Tool]] = None, **kwargs, ) -> ChatMessage: # Get token tracker from decorator (if monitoring is available) token_tracker = kwargs.pop('_token_tracker', None) @@ -63,7 +63,7 @@ def __call__(self, messages: List[Dict[str, Any]], stop_sequences: Optional[List completion_kwargs = self._prepare_completion_kwargs( messages=messages, stop_sequences=stop_sequences, - grammar=grammar, tools_to_call_from=tools_to_call_from, model=self.model_id, + response_format=response_format, tools_to_call_from=tools_to_call_from, model=self.model_id, custom_role_conversions=self.custom_role_conversions, convert_images_to_image_urls=True, temperature=self.temperature, top_p=self.top_p, **kwargs, ) diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml index 453857a1d..1e1369fb7 100644 --- a/sdk/pyproject.toml +++ b/sdk/pyproject.toml @@ -31,14 +31,14 @@ dependencies = [ "rich>=13.9.4", "setuptools>=75.1.0", "websockets>=14.2", - "smolagents[mcp]==1.15.0", + "smolagents[mcp]==1.23.0", "Pillow>=10.0.0", "aiohttp>=3.1.13", "jieba>=0.42.1", "boto3>=1.37.34", "botocore>=1.37.34", "python-multipart>=0.0.20", - "mcpadapt==0.1.9", + "mcpadapt>=0.1.13", "mcp==1.10.1", "tiktoken>=0.5.0", "tavily-python", diff --git a/test/sdk/core/agents/test_core_agent.py b/test/sdk/core/agents/test_core_agent.py index cb6240893..54b725620 100644 --- a/test/sdk/core/agents/test_core_agent.py +++ b/test/sdk/core/agents/test_core_agent.py @@ -1,3 +1,5 @@ +import json + import pytest from unittest.mock import MagicMock, patch from threading import Event @@ -14,22 +16,98 @@ def __init__(self, message): super().__init__(message) +class MockAgentMaxStepsError(Exception): + pass + + # Mock for smolagents and its sub-modules mock_smolagents = MagicMock() -mock_smolagents.ActionStep = MagicMock() -mock_smolagents.TaskStep = MagicMock() -mock_smolagents.SystemPromptStep = MagicMock() mock_smolagents.AgentError = MockAgentError mock_smolagents.handle_agent_output_types = MagicMock( return_value="handled_output") +mock_smolagents.utils.AgentMaxStepsError = MockAgentMaxStepsError + +# Create proper class types for isinstance checks (not MagicMock) +class MockActionStep: + def __init__(self, *args, **kwargs): + self.step_number = kwargs.get('step_number', 1) + self.timing = kwargs.get('timing', None) + self.observations_images = kwargs.get('observations_images', None) + self.model_input_messages = None + self.model_output_message = None + self.model_output = None + self.token_usage = None + self.code_action = None + self.tool_calls = None + self.observations = None + self.action_output = None + self.is_final_answer = False + self.error = None + +class MockTaskStep: + def __init__(self, *args, **kwargs): + self.task = kwargs.get('task', '') + self.task_images = kwargs.get('task_images', None) + +class MockSystemPromptStep: + def __init__(self, *args, **kwargs): + self.system_prompt = kwargs.get('system_prompt', '') + +class MockFinalAnswerStep: + def __init__(self, *args, **kwargs): + # Handle both positional and keyword arguments + if args: + self.output = args[0] + else: + self.output = kwargs.get('output', '') + +class MockPlanningStep: + def __init__(self, *args, **kwargs): + self.token_usage = kwargs.get('token_usage', None) + +class MockActionOutput: + def __init__(self, *args, **kwargs): + self.output = kwargs.get('output', None) + self.is_final_answer = kwargs.get('is_final_answer', False) + +class MockRunResult: + def __init__(self, *args, **kwargs): + self.output = kwargs.get('output', None) + self.token_usage = kwargs.get('token_usage', None) + self.steps = kwargs.get('steps', []) + self.timing = kwargs.get('timing', None) + self.state = kwargs.get('state', 'success') + +class MockCodeOutput: + """Mock object returned by python_executor.""" + def __init__(self, output=None, logs="", is_final_answer=False): + self.output = output + self.logs = logs + self.is_final_answer = is_final_answer + +# Assign proper classes to mock_smolagents +mock_smolagents.ActionStep = MockActionStep +mock_smolagents.TaskStep = MockTaskStep +mock_smolagents.SystemPromptStep = MockSystemPromptStep # Create dummy smolagents sub-modules for sub_mod in ["agents", "memory", "models", "monitoring", "utils", "local_python_executor"]: mock_module = MagicMock() setattr(mock_smolagents, sub_mod, mock_module) +# Assign classes to memory submodule +mock_smolagents.memory.ActionStep = MockActionStep +mock_smolagents.memory.TaskStep = MockTaskStep +mock_smolagents.memory.SystemPromptStep = MockSystemPromptStep +mock_smolagents.memory.FinalAnswerStep = MockFinalAnswerStep +mock_smolagents.memory.PlanningStep = MockPlanningStep +mock_smolagents.memory.ToolCall = MagicMock + +# Assign classes to agents submodule mock_smolagents.agents.CodeAgent = MagicMock +mock_smolagents.agents.ActionOutput = MockActionOutput +mock_smolagents.agents.RunResult = MockRunResult # Provide actual implementations for commonly used utils functions @@ -72,6 +150,23 @@ def mock_truncate_content(content, max_length=1000): core_agent_module = sys.modules['sdk.nexent.core.agents.core_agent'] # Override AgentError inside the imported module to ensure it has message attr core_agent_module.AgentError = MockAgentError + core_agent_module.AgentMaxStepsError = MockAgentMaxStepsError + # Override classes to use our mock classes for isinstance checks + core_agent_module.FinalAnswerStep = MockFinalAnswerStep + core_agent_module.ActionStep = MockActionStep + core_agent_module.PlanningStep = MockPlanningStep + core_agent_module.ActionOutput = MockActionOutput + core_agent_module.RunResult = MockRunResult + # Override CodeAgent to be a proper class that can be inherited + class MockCodeAgent: + def __init__(self, prompt_templates=None, *args, **kwargs): + # Accept any arguments but don't require observer + # Store attributes that might be accessed + self.prompt_templates = prompt_templates + # Initialize common attributes that CodeAgent might have + for key, value in kwargs.items(): + setattr(self, key, value) + core_agent_module.CodeAgent = MockCodeAgent CoreAgent = ImportedCoreAgent @@ -103,16 +198,50 @@ def core_agent_instance(mock_observer): agent.stop_event = Event() agent.memory = MagicMock() agent.memory.steps = [] + agent.memory.get_full_steps = MagicMock(return_value=[]) agent.python_executor = MagicMock() + + # Mock logger with all required methods + agent.logger = MagicMock() + agent.logger.log = MagicMock() + agent.logger.log_task = MagicMock() + agent.logger.log_markdown = MagicMock() + agent.logger.log_code = MagicMock() agent.step_number = 1 agent._execute_step = MagicMock() agent._finalize_step = MagicMock() agent._handle_max_steps_reached = MagicMock() + + # Set default attributes that might be needed + agent.max_steps = 5 + agent.state = {} + agent.system_prompt = "test system prompt" + agent.return_full_result = False + agent.provide_run_summary = False + agent.tools = {} + agent.managed_agents = {} + agent.monitor = MagicMock() + agent.monitor.reset = MagicMock() + agent.model = MagicMock() + if hasattr(agent.model, 'model_id'): + agent.model.model_id = "test-model" + agent.code_block_tags = ["```", "```"] + agent._use_structured_outputs_internally = False + agent.final_answer_checks = None # Set to avoid MagicMock creating new CoreAgent instances return agent +@pytest.fixture(autouse=True) +def reset_token_usage_mock(): + """Ensure TokenUsage mock does not leak state between tests.""" + token_usage = getattr(core_agent_module, "TokenUsage", None) + if hasattr(token_usage, "reset_mock"): + token_usage.reset_mock() + yield + + # ---------------------------------------------------------------------------- # Tests for _run method # ---------------------------------------------------------------------------- @@ -123,11 +252,12 @@ def test_run_normal_execution(core_agent_instance): task = "test task" max_steps = 3 - # Mock _execute_step to return a generator that yields final answer - def mock_execute_generator(action_step): - yield "final_answer" + # Mock _step_stream to return a generator that yields ActionOutput with final answer + def mock_step_stream(action_step): + action_output = MockActionOutput(output="final_answer", is_final_answer=True) + yield action_output - with patch.object(core_agent_instance, '_execute_step', side_effect=mock_execute_generator) as mock_execute_step, \ + with patch.object(core_agent_instance, '_step_stream', side_effect=mock_step_stream) as mock_step_stream_patch, \ patch.object(core_agent_instance, '_finalize_step') as mock_finalize_step: core_agent_instance.step_number = 1 @@ -135,11 +265,11 @@ def mock_execute_generator(action_step): result = list(core_agent_instance._run_stream(task, max_steps)) # Assertions - # _run_stream yields: generator output + action step + final answer step + # _run_stream yields: ActionOutput from _step_stream + action step + final answer step assert len(result) == 3 - assert result[0] == "final_answer" # Generator output - assert isinstance(result[1], MagicMock) # Action step - assert isinstance(result[2], MagicMock) # Final answer step + assert isinstance(result[0], MockActionOutput) # ActionOutput from _step_stream + assert isinstance(result[1], MockActionStep) # Action step + assert isinstance(result[2], MockFinalAnswerStep) # Final answer step def test_run_with_max_steps_reached(core_agent_instance): @@ -148,11 +278,12 @@ def test_run_with_max_steps_reached(core_agent_instance): task = "test task" max_steps = 2 - # Mock _execute_step to return None (no final answer) - def mock_execute_generator(action_step): - yield None + # Mock _step_stream to return ActionOutput without final answer + def mock_step_stream(action_step): + action_output = MockActionOutput(output=None, is_final_answer=False) + yield action_output - with patch.object(core_agent_instance, '_execute_step', side_effect=mock_execute_generator) as mock_execute_step, \ + with patch.object(core_agent_instance, '_step_stream', side_effect=mock_step_stream) as mock_step_stream_patch, \ patch.object(core_agent_instance, '_finalize_step') as mock_finalize_step, \ patch.object(core_agent_instance, '_handle_max_steps_reached', return_value="max_steps_reached") as mock_handle_max: @@ -162,18 +293,19 @@ def mock_execute_generator(action_step): result = list(core_agent_instance._run_stream(task, max_steps)) # Assertions - # For 2 steps: (None + action_step) * 2 + final_action_step + final_answer_step = 6 - assert len(result) == 6 - assert result[0] is None # First generator output - assert isinstance(result[1], MagicMock) # First action step - assert result[2] is None # Second generator output - assert isinstance(result[3], MagicMock) # Second action step - # Final action step (from _handle_max_steps_reached) - assert isinstance(result[4], MagicMock) - assert isinstance(result[5], MagicMock) # Final answer step + # For 2 steps: (ActionOutput + action_step) * 2 + final_action_step + final_answer_step = 6 + assert len(result) >= 5 + # First step: ActionOutput + ActionStep + assert isinstance(result[0], MockActionOutput) # First ActionOutput + assert isinstance(result[1], MockActionStep) # First action step + # Second step: ActionOutput + ActionStep + assert isinstance(result[2], MockActionOutput) # Second ActionOutput + assert isinstance(result[3], MockActionStep) # Second action step + # Last should be final answer step + assert isinstance(result[-1], MockFinalAnswerStep) # Final answer step # Verify method calls - assert mock_execute_step.call_count == 2 + assert mock_step_stream_patch.call_count == 2 mock_handle_max.assert_called_once() assert mock_finalize_step.call_count == 2 @@ -184,23 +316,28 @@ def test_run_with_stop_event(core_agent_instance): task = "test task" max_steps = 3 - def mock_execute_generator(action_step): + def mock_step_stream(action_step): core_agent_instance.stop_event.set() - yield None + action_output = MockActionOutput(output=None, is_final_answer=False) + yield action_output + + # Mock handle_agent_output_types to return the input value (identity function) + # This way when final_answer = "", it will be passed through + with patch.object(core_agent_module, 'handle_agent_output_types', side_effect=lambda x: x): + # Mock _step_stream to set stop event + with patch.object(core_agent_instance, '_step_stream', side_effect=mock_step_stream): + with patch.object(core_agent_instance, '_finalize_step'): + # Execute + result = list(core_agent_instance._run_stream(task, max_steps)) - # Mock _execute_step to set stop event - with patch.object(core_agent_instance, '_execute_step', side_effect=mock_execute_generator): - with patch.object(core_agent_instance, '_finalize_step'): - # Execute - result = list(core_agent_instance._run_stream(task, max_steps)) - - # Assertions - # Should yield: generator output + action step + final answer step - assert len(result) == 3 - assert result[0] is None # Generator output - assert isinstance(result[1], MagicMock) # Action step - # Final answer step with "" - assert isinstance(result[2], MagicMock) + # Assertions + # Should yield: ActionOutput from _step_stream + action step + final answer step + assert len(result) == 3 + assert isinstance(result[0], MockActionOutput) # ActionOutput from _step_stream + assert isinstance(result[1], MockActionStep) # Action step + # Final answer step with "" + assert isinstance(result[2], MockFinalAnswerStep) + assert result[2].output == "" def test_run_with_final_answer_error(core_agent_instance): @@ -209,9 +346,9 @@ def test_run_with_final_answer_error(core_agent_instance): task = "test task" max_steps = 3 - # Mock _execute_step to raise FinalAnswerError - with patch.object(core_agent_instance, '_execute_step', - side_effect=core_agent_module.FinalAnswerError()) as mock_execute_step, \ + # Mock _step_stream to raise FinalAnswerError + with patch.object(core_agent_instance, '_step_stream', + side_effect=core_agent_module.FinalAnswerError()) as mock_step_stream, \ patch.object(core_agent_instance, '_finalize_step'): # Execute result = list(core_agent_instance._run_stream(task, max_steps)) @@ -219,8 +356,8 @@ def test_run_with_final_answer_error(core_agent_instance): # Assertions # When FinalAnswerError occurs, it should yield action step + final answer step assert len(result) == 2 - assert isinstance(result[0], MagicMock) # Action step - assert isinstance(result[1], MagicMock) # Final answer step + assert isinstance(result[0], MockActionStep) # Action step + assert isinstance(result[1], MockFinalAnswerStep) # Final answer step def test_run_with_final_answer_error_and_model_output(core_agent_instance): @@ -229,16 +366,12 @@ def test_run_with_final_answer_error_and_model_output(core_agent_instance): task = "test task" max_steps = 3 - # Create a mock action step with model_output - mock_action_step = MagicMock() - mock_action_step.model_output = "```\nprint('hello')\n```" - - # Mock _execute_step to set model_output and then raise FinalAnswerError - def mock_execute_step(action_step): + # Mock _step_stream to set model_output and then raise FinalAnswerError + def mock_step_stream(action_step): action_step.model_output = "```\nprint('hello')\n```" raise core_agent_module.FinalAnswerError() - with patch.object(core_agent_instance, '_execute_step', side_effect=mock_execute_step), \ + with patch.object(core_agent_instance, '_step_stream', side_effect=mock_step_stream), \ patch.object(core_agent_module, 'convert_code_format', return_value="```python\nprint('hello')\n```") as mock_convert, \ patch.object(core_agent_instance, '_finalize_step'): # Execute @@ -246,8 +379,8 @@ def mock_execute_step(action_step): # Assertions assert len(result) == 2 - assert isinstance(result[0], MagicMock) # Action step - assert isinstance(result[1], MagicMock) # Final answer step + assert isinstance(result[0], MockActionStep) # Action step + assert isinstance(result[1], MockFinalAnswerStep) # Final answer step # Verify convert_code_format was called mock_convert.assert_called_once_with( "```\nprint('hello')\n```") @@ -259,9 +392,9 @@ def test_run_with_agent_error_updated(core_agent_instance): task = "test task" max_steps = 3 - # Mock _execute_step to raise AgentError - with patch.object(core_agent_instance, '_execute_step', - side_effect=MockAgentError("test error")) as mock_execute_step, \ + # Mock _step_stream to raise AgentError + with patch.object(core_agent_instance, '_step_stream', + side_effect=MockAgentError("test error")) as mock_step_stream, \ patch.object(core_agent_instance, '_finalize_step'): # Execute result = list(core_agent_instance._run_stream(task, max_steps)) @@ -270,9 +403,9 @@ def test_run_with_agent_error_updated(core_agent_instance): # When AgentError occurs, it should yield action step + final answer step # But the error causes the loop to continue, so we get multiple action steps assert len(result) >= 2 - assert isinstance(result[0], MagicMock) # Action step with error + assert isinstance(result[0], MockActionStep) # Action step with error # Last item should be final answer step - assert isinstance(result[-1], MagicMock) # Final answer step + assert isinstance(result[-1], MockFinalAnswerStep) # Final answer step def test_run_with_agent_parse_error_branch_updated(core_agent_instance): @@ -280,25 +413,40 @@ def test_run_with_agent_parse_error_branch_updated(core_agent_instance): task = "parse task" max_steps = 1 - # Mock _execute_step to set model_output and then raise FinalAnswerError - def mock_execute_step(action_step): + # Mock _step_stream to set model_output and then raise FinalAnswerError + def mock_step_stream(action_step): action_step.model_output = "```\nprint('hello')\n```" raise core_agent_module.FinalAnswerError() - with patch.object(core_agent_instance, '_execute_step', side_effect=mock_execute_step), \ + with patch.object(core_agent_instance, '_step_stream', side_effect=mock_step_stream), \ patch.object(core_agent_module, 'convert_code_format', return_value="```python\nprint('hello')\n```") as mock_convert, \ patch.object(core_agent_instance, '_finalize_step'): results = list(core_agent_instance._run_stream(task, max_steps)) # _run should yield action step + final answer step assert len(results) == 2 - assert isinstance(results[0], MagicMock) # Action step - assert isinstance(results[1], MagicMock) # Final answer step + assert isinstance(results[0], MockActionStep) # Action step + assert isinstance(results[1], MockFinalAnswerStep) # Final answer step # Verify convert_code_format was called mock_convert.assert_called_once_with( "```\nprint('hello')\n```") +def test_run_stream_validates_final_answer_when_checks_enabled(core_agent_instance): + """Ensure _run_stream triggers final answer validation when checks are configured.""" + task = "validate task" + core_agent_instance.final_answer_checks = ["non-empty"] + core_agent_instance._validate_final_answer = MagicMock() + + def mock_step_stream(action_step): + yield MockActionOutput(output="final answer", is_final_answer=True) + + with patch.object(core_agent_instance, '_step_stream', side_effect=mock_step_stream), \ + patch.object(core_agent_instance, '_finalize_step'): + result = list(core_agent_instance._run_stream(task, max_steps=1)) + + assert len(result) == 3 # ActionOutput, ActionStep, FinalAnswerStep + core_agent_instance._validate_final_answer.assert_called_once_with("final answer") def test_convert_code_format_display_replacements(): """Validate convert_code_format correctly transforms format to standard markdown.""" @@ -575,6 +723,10 @@ def test_step_stream_parse_success(core_agent_instance): core_agent_instance.step_number = 1 core_agent_instance.grammar = None core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.memory = MagicMock() core_agent_instance.memory.steps = [] @@ -586,7 +738,7 @@ def test_step_stream_parse_success(core_agent_instance): return_value=[]) core_agent_instance.model = MagicMock(return_value=mock_chat_message) core_agent_instance.python_executor = MagicMock( - return_value=("output", "logs", False)) + return_value=MockCodeOutput(output="output", logs="logs", is_final_answer=False)) # Execute list(core_agent_instance._step_stream(mock_memory_step)) @@ -599,6 +751,33 @@ def test_step_stream_parse_success(core_agent_instance): assert hasattr(mock_memory_step.tool_calls[0], 'arguments') +def test_step_stream_structured_outputs_with_stop_sequence(core_agent_instance): + """Ensure _step_stream handles structured outputs correctly.""" + mock_memory_step = MagicMock() + mock_chat_message = MagicMock() + mock_chat_message.content = json.dumps({"code": "print('hello')"}) + mock_chat_message.token_usage = MagicMock() + + core_agent_instance.agent_name = "test_agent" + core_agent_instance.step_number = 1 + core_agent_instance._use_structured_outputs_internally = True + core_agent_instance.code_block_tags = ["<>", "[CLOSE]"] + core_agent_instance.write_memory_to_messages = MagicMock(return_value=[]) + core_agent_instance.model = MagicMock(return_value=mock_chat_message) + core_agent_instance.python_executor = MagicMock( + return_value=MockCodeOutput(output="result", logs="", is_final_answer=False) + ) + + with patch.object(core_agent_module, 'extract_code_from_text', return_value="print('hello')") as mock_extract, \ + patch.object(core_agent_module, 'fix_final_answer_code', side_effect=lambda code: code): + list(core_agent_instance._step_stream(mock_memory_step)) + + # Ensure structured output helpers were used + mock_extract.assert_called_once_with("print('hello')", core_agent_instance.code_block_tags) + call_kwargs = core_agent_instance.model.call_args.kwargs + assert call_kwargs["response_format"] == core_agent_module.CODEAGENT_RESPONSE_FORMAT + + def test_step_stream_skips_execution_for_display_only(core_agent_instance): """Test that _step_stream raises FinalAnswerError when only DISPLAY code blocks are present.""" # Setup @@ -611,6 +790,10 @@ def test_step_stream_skips_execution_for_display_only(core_agent_instance): core_agent_instance.step_number = 1 core_agent_instance.grammar = None core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.memory = MagicMock() core_agent_instance.memory.steps = [] @@ -637,6 +820,10 @@ def test_step_stream_parse_failure_raises_final_answer_error(core_agent_instance core_agent_instance.step_number = 1 core_agent_instance.grammar = None core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.memory = MagicMock() core_agent_instance.memory.steps = [] @@ -662,6 +849,10 @@ def test_step_stream_model_generation_error(core_agent_instance): core_agent_instance.step_number = 1 core_agent_instance.grammar = None core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.memory = MagicMock() core_agent_instance.memory.steps = [] @@ -687,6 +878,10 @@ def test_step_stream_execution_success(core_agent_instance): core_agent_instance.step_number = 1 core_agent_instance.grammar = None core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.memory = MagicMock() core_agent_instance.memory.steps = [] @@ -698,14 +893,16 @@ def test_step_stream_execution_success(core_agent_instance): return_value=[]) core_agent_instance.model = MagicMock(return_value=mock_chat_message) core_agent_instance.python_executor = MagicMock( - return_value=("Hello World", "Execution logs", False)) + return_value=MockCodeOutput(output="Hello World", logs="Execution logs", is_final_answer=False)) # Execute result = list(core_agent_instance._step_stream(mock_memory_step)) # Assertions - # Should yield None when is_final_answer is False - assert result[0] is None + # Should yield ActionOutput when is_final_answer is False + assert len(result) == 1 + assert isinstance(result[0], MockActionOutput) + assert result[0].is_final_answer is False assert mock_memory_step.observations is not None # Check that observations was set (we can't easily test the exact content due to mock behavior) assert hasattr(mock_memory_step, 'observations') @@ -723,6 +920,10 @@ def test_step_stream_execution_final_answer(core_agent_instance): core_agent_instance.step_number = 1 core_agent_instance.grammar = None core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.memory = MagicMock() core_agent_instance.memory.steps = [] @@ -734,13 +935,16 @@ def test_step_stream_execution_final_answer(core_agent_instance): return_value=[]) core_agent_instance.model = MagicMock(return_value=mock_chat_message) core_agent_instance.python_executor = MagicMock( - return_value=("final answer", "Execution logs", True)) + return_value=MockCodeOutput(output="final answer", logs="Execution logs", is_final_answer=True)) # Execute result = list(core_agent_instance._step_stream(mock_memory_step)) # Assertions - assert result[0] == "final answer" # Should yield the final answer + assert len(result) == 1 + assert isinstance(result[0], MockActionOutput) + assert result[0].is_final_answer is True + assert result[0].output == "final answer" def test_step_stream_execution_error(core_agent_instance): @@ -755,6 +959,10 @@ def test_step_stream_execution_error(core_agent_instance): core_agent_instance.step_number = 1 core_agent_instance.grammar = None core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.memory = MagicMock() core_agent_instance.memory.steps = [] @@ -795,6 +1003,10 @@ def test_step_stream_observer_calls(core_agent_instance): core_agent_instance.step_number = 1 core_agent_instance.grammar = None core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.memory = MagicMock() core_agent_instance.memory.steps = [] @@ -806,7 +1018,7 @@ def test_step_stream_observer_calls(core_agent_instance): return_value=[]) core_agent_instance.model = MagicMock(return_value=mock_chat_message) core_agent_instance.python_executor = MagicMock( - return_value=("test", "logs", False)) + return_value=MockCodeOutput(output="test", logs="logs", is_final_answer=False)) # Execute list(core_agent_instance._step_stream(mock_memory_step)) @@ -847,6 +1059,10 @@ def test_step_stream_execution_with_logs(core_agent_instance): core_agent_instance.step_number = 1 core_agent_instance.grammar = None core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.memory = MagicMock() core_agent_instance.memory.steps = [] @@ -859,14 +1075,16 @@ def test_step_stream_execution_with_logs(core_agent_instance): core_agent_instance.model = MagicMock(return_value=mock_chat_message) # Mock python_executor to return logs core_agent_instance.python_executor = MagicMock( - return_value=("output", "Some execution logs", False)) + return_value=MockCodeOutput(output="output", logs="Some execution logs", is_final_answer=False)) # Execute result = list(core_agent_instance._step_stream(mock_memory_step)) # Assertions - # Should yield None when is_final_answer is False - assert result[0] is None + # Should yield ActionOutput when is_final_answer is False + assert len(result) == 1 + assert isinstance(result[0], MockActionOutput) + assert result[0].is_final_answer is False # Check that execution logs were recorded assert core_agent_instance.observer.add_message.call_count >= 3 calls = core_agent_instance.observer.add_message.call_args_list @@ -887,6 +1105,10 @@ def test_step_stream_execution_error_with_print_outputs(core_agent_instance): core_agent_instance.step_number = 1 core_agent_instance.grammar = None core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.memory = MagicMock() core_agent_instance.memory.steps = [] @@ -926,6 +1148,10 @@ def test_step_stream_execution_error_with_import_warning(core_agent_instance): core_agent_instance.step_number = 1 core_agent_instance.grammar = None core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.memory = MagicMock() core_agent_instance.memory.steps = [] @@ -969,6 +1195,10 @@ def test_step_stream_execution_error_without_print_outputs(core_agent_instance): core_agent_instance.step_number = 1 core_agent_instance.grammar = None core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.memory = MagicMock() core_agent_instance.memory.steps = [] @@ -1003,6 +1233,10 @@ def test_step_stream_execution_with_none_output(core_agent_instance): core_agent_instance.step_number = 1 core_agent_instance.grammar = None core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.memory = MagicMock() core_agent_instance.memory.steps = [] @@ -1015,14 +1249,16 @@ def test_step_stream_execution_with_none_output(core_agent_instance): core_agent_instance.model = MagicMock(return_value=mock_chat_message) # Mock python_executor to return None output core_agent_instance.python_executor = MagicMock( - return_value=(None, "Execution logs", False)) + return_value=MockCodeOutput(output=None, logs="Execution logs", is_final_answer=False)) # Execute result = list(core_agent_instance._step_stream(mock_memory_step)) # Assertions - # Should yield None when is_final_answer is False - assert result[0] is None + # Should yield ActionOutput when is_final_answer is False + assert len(result) == 1 + assert isinstance(result[0], MockActionOutput) + assert result[0].is_final_answer is False assert mock_memory_step.observations is not None # Check that observations was set but should not contain "Last output from code snippet" # since output is None @@ -1050,6 +1286,10 @@ def test_run_with_additional_args(core_agent_instance): core_agent_instance.monitor = MagicMock() core_agent_instance.monitor.reset = MagicMock() core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.model = MagicMock() core_agent_instance.model.model_id = "test-model" core_agent_instance.name = "test_agent" @@ -1059,8 +1299,7 @@ def test_run_with_additional_args(core_agent_instance): core_agent_instance.observer = MagicMock() # Mock _run_stream to return a simple result - mock_final_step = MagicMock() - mock_final_step.final_answer = "final result" + mock_final_step = MockFinalAnswerStep(output="final result") with patch.object(core_agent_instance, '_run_stream', return_value=[mock_final_step]): # Execute @@ -1089,6 +1328,10 @@ def test_run_with_stream_true(core_agent_instance): core_agent_instance.monitor = MagicMock() core_agent_instance.monitor.reset = MagicMock() core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.model = MagicMock() core_agent_instance.model.model_id = "test-model" core_agent_instance.name = "test_agent" @@ -1123,6 +1366,10 @@ def test_run_with_reset_false(core_agent_instance): core_agent_instance.monitor = MagicMock() core_agent_instance.monitor.reset = MagicMock() core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.model = MagicMock() core_agent_instance.model.model_id = "test-model" core_agent_instance.name = "test_agent" @@ -1132,8 +1379,7 @@ def test_run_with_reset_false(core_agent_instance): core_agent_instance.observer = MagicMock() # Mock _run_stream to return a simple result - mock_final_step = MagicMock() - mock_final_step.final_answer = "final result" + mock_final_step = MockFinalAnswerStep(output="final result") with patch.object(core_agent_instance, '_run_stream', return_value=[mock_final_step]): # Execute @@ -1162,6 +1408,10 @@ def test_run_with_images(core_agent_instance): core_agent_instance.monitor = MagicMock() core_agent_instance.monitor.reset = MagicMock() core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.model = MagicMock() core_agent_instance.model.model_id = "test-model" core_agent_instance.name = "test_agent" @@ -1171,8 +1421,7 @@ def test_run_with_images(core_agent_instance): core_agent_instance.observer = MagicMock() # Mock _run_stream to return a simple result - mock_final_step = MagicMock() - mock_final_step.final_answer = "final result" + mock_final_step = MockFinalAnswerStep(output="final result") with patch.object(core_agent_instance, '_run_stream', return_value=[mock_final_step]): # Execute @@ -1185,8 +1434,89 @@ def test_run_with_images(core_agent_instance): call_args = core_agent_instance.memory.steps.append.call_args[0][0] # The TaskStep is mocked, so just verify it was called with correct arguments via the constructor # We'll check that TaskStep was called with the right parameters - mock_smolagents.memory.TaskStep.assert_called_with( - task=task, task_images=images) + assert isinstance(call_args, MockTaskStep) + assert call_args.task == task + assert call_args.task_images == images + + +def test_run_return_full_result_success_state(core_agent_instance): + """run should return RunResult with aggregated token usage when requested.""" + task = "test task" + token_usage = MagicMock(input_tokens=7, output_tokens=3) + action_step = core_agent_module.ActionStep() + action_step.token_usage = token_usage + + core_agent_instance.name = "test_agent" + core_agent_instance.memory.steps = [action_step] + core_agent_instance.memory.get_full_steps = MagicMock(return_value=[{"step": "data"}]) + core_agent_instance.memory.reset = MagicMock() + core_agent_instance.monitor.reset = MagicMock() + core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() + core_agent_instance.model = MagicMock() + core_agent_instance.model.model_id = "model" + core_agent_instance.python_executor = MagicMock() + core_agent_instance.python_executor.send_variables = MagicMock() + core_agent_instance.python_executor.send_tools = MagicMock() + core_agent_instance.observer = MagicMock() + + final_step = MockFinalAnswerStep(output="final result") + with patch.object(core_agent_instance, '_run_stream', return_value=[final_step]): + result = core_agent_instance.run(task, return_full_result=True) + + assert isinstance(result, core_agent_module.RunResult) + assert result.output == "final result" + core_agent_module.TokenUsage.assert_called_once_with(input_tokens=7, output_tokens=3) + assert result.token_usage == core_agent_module.TokenUsage.return_value + assert result.state == "success" + core_agent_instance.memory.get_full_steps.assert_called_once() + + +def test_run_return_full_result_max_steps_error(core_agent_instance): + """run should mark state as max_steps_error when the last step contains AgentMaxStepsError.""" + task = "test task" + + action_step = core_agent_module.ActionStep() + action_step.token_usage = None + action_step.error = core_agent_module.AgentMaxStepsError("max steps reached") + + class StepsList(list): + def append(self, item): + # Skip storing TaskStep to keep action_step as the last element + if isinstance(item, core_agent_module.TaskStep): + return + super().append(item) + + core_agent_instance.name = "test_agent" + steps_list = StepsList([action_step]) + core_agent_instance.memory.steps = steps_list + core_agent_instance.memory.get_full_steps = MagicMock(return_value=[{"step": "data"}]) + core_agent_instance.memory.reset = MagicMock() + core_agent_instance.monitor.reset = MagicMock() + core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() + core_agent_instance.model = MagicMock() + core_agent_instance.model.model_id = "model" + core_agent_instance.python_executor = MagicMock() + core_agent_instance.python_executor.send_variables = MagicMock() + core_agent_instance.python_executor.send_tools = MagicMock() + core_agent_instance.observer = MagicMock() + + final_step = MockFinalAnswerStep(output="final result") + with patch.object(core_agent_instance, '_run_stream', return_value=[final_step]): + result = core_agent_instance.run(task, return_full_result=True) + + assert isinstance(result, core_agent_module.RunResult) + assert result.token_usage is None + core_agent_module.TokenUsage.assert_not_called() + assert result.state == "max_steps_error" + core_agent_instance.memory.get_full_steps.assert_called_once() def test_run_without_python_executor(core_agent_instance): @@ -1204,6 +1534,10 @@ def test_run_without_python_executor(core_agent_instance): core_agent_instance.monitor = MagicMock() core_agent_instance.monitor.reset = MagicMock() core_agent_instance.logger = MagicMock() + core_agent_instance.logger.log = MagicMock() + core_agent_instance.logger.log_task = MagicMock() + core_agent_instance.logger.log_markdown = MagicMock() + core_agent_instance.logger.log_code = MagicMock() core_agent_instance.model = MagicMock() core_agent_instance.model.model_id = "test-model" core_agent_instance.name = "test_agent" @@ -1213,8 +1547,7 @@ def test_run_without_python_executor(core_agent_instance): core_agent_instance.observer = MagicMock() # Mock _run_stream to return a simple result - mock_final_step = MagicMock() - mock_final_step.final_answer = "final result" + mock_final_step = MockFinalAnswerStep(output="final result") with patch.object(core_agent_instance, '_run_stream', return_value=[mock_final_step]): # Execute @@ -1267,6 +1600,31 @@ def test_call_method_success(core_agent_instance): "test_agent", ProcessType.AGENT_FINISH, "test result") +def test_call_method_with_run_result_return(core_agent_instance): + """Test __call__ handles RunResult by extracting its output.""" + task = "test task" + core_agent_instance.name = "test_agent" + core_agent_instance.state = {} + core_agent_instance.prompt_templates = { + "managed_agent": { + "task": "Task: {{task}}", + "report": "Report: {{final_answer}}" + } + } + core_agent_instance.provide_run_summary = False + core_agent_instance.observer = MagicMock() + + run_result = core_agent_module.RunResult(output="run result", token_usage=None, steps=[], timing=None, state="success") + with patch.object(core_agent_instance, 'run', return_value=run_result) as mock_run: + result = core_agent_instance(task) + + assert "Report: run result" in result + mock_run.assert_called_once() + core_agent_instance.observer.add_message.assert_called_with( + "test_agent", ProcessType.AGENT_FINISH, "run result" + ) + + def test_call_method_with_run_summary(core_agent_instance): """Test __call__ method with provide_run_summary=True.""" # Setup @@ -1284,10 +1642,14 @@ def test_call_method_with_run_summary(core_agent_instance): core_agent_instance.provide_run_summary = True core_agent_instance.observer = MagicMock() - # Mock write_memory_to_messages to return some simple messages + # Mock write_memory_to_messages to return some simple messages with .content attribute + class MockMessage: + def __init__(self, content): + self.content = content + mock_messages = [ - {"content": "msg1"}, - {"content": "msg2"} + MockMessage("msg1"), + MockMessage("msg2") ] core_agent_instance.write_memory_to_messages = MagicMock( return_value=mock_messages) diff --git a/test/sdk/core/agents/test_nexent_agent.py b/test/sdk/core/agents/test_nexent_agent.py index 3dc831323..2a842ea72 100644 --- a/test/sdk/core/agents/test_nexent_agent.py +++ b/test/sdk/core/agents/test_nexent_agent.py @@ -27,11 +27,16 @@ class _ActionStep: - pass + def __init__(self, step_number=None, timing=None, action_output=None, model_output=None): + self.step_number = step_number + self.timing = timing + self.action_output = action_output + self.model_output = model_output class _TaskStep: - pass + def __init__(self, task=None): + self.task = task class _AgentText: @@ -214,6 +219,8 @@ class _MockToolSign: "nexent.storage": mock_nexent_storage_module, "nexent.multi_modal": mock_nexent_multi_modal_module, "nexent.multi_modal.load_save_object": mock_nexent_load_save_module, + # Mock tiktoken to avoid importing the real package when models import it + "tiktoken": MagicMock(), # Mock the OpenAIModel import "sdk.nexent.core.models.openai_llm": MagicMock(OpenAIModel=mock_openai_model_class), # Mock CoreAgent import @@ -230,7 +237,7 @@ class _MockToolSign: from sdk.nexent.core.utils.observer import MessageObserver, ProcessType from sdk.nexent.core.agents import nexent_agent from sdk.nexent.core.agents.nexent_agent import NexentAgent, ActionStep, TaskStep - from sdk.nexent.core.agents.agent_model import ToolConfig, ModelConfig, AgentConfig + from sdk.nexent.core.agents.agent_model import ToolConfig, ModelConfig, AgentConfig, AgentHistory # ---------------------------------------------------------------------------- @@ -1087,6 +1094,48 @@ def test_add_history_to_agent_none_history(nexent_agent_instance, mock_core_agen assert len(mock_core_agent.memory.steps) == 0 +def test_add_history_to_agent_user_and_assistant_history(nexent_agent_instance, mock_core_agent): + """Test add_history_to_agent correctly converts user and assistant messages to memory steps.""" + nexent_agent_instance.agent = mock_core_agent + + user_msg = AgentHistory(role="user", content="User question") + assistant_msg = AgentHistory(role="assistant", content="Assistant reply") + + nexent_agent_instance.add_history_to_agent([user_msg, assistant_msg]) + + mock_core_agent.memory.reset.assert_called_once() + assert len(mock_core_agent.memory.steps) == 2 + + # First step should be a TaskStep for the user message + first_step = mock_core_agent.memory.steps[0] + assert isinstance(first_step, TaskStep) + assert first_step.task == "User question" + + # Second step should be an ActionStep for the assistant message + second_step = mock_core_agent.memory.steps[1] + assert isinstance(second_step, ActionStep) + assert second_step.action_output == "Assistant reply" + assert second_step.model_output == "Assistant reply" + + +def test_add_history_to_agent_invalid_agent_type(nexent_agent_instance): + """Test add_history_to_agent raises TypeError when agent is not a CoreAgent.""" + nexent_agent_instance.agent = "not_core_agent" + + with pytest.raises(TypeError, match="agent must be a CoreAgent object"): + nexent_agent_instance.add_history_to_agent([]) + + +def test_add_history_to_agent_invalid_history_items(nexent_agent_instance, mock_core_agent): + """Test add_history_to_agent raises TypeError when history items are not AgentHistory.""" + nexent_agent_instance.agent = mock_core_agent + + invalid_history = [{"role": "user", "content": "hello"}] + + with pytest.raises(TypeError, match="history must be a list of AgentHistory objects"): + nexent_agent_instance.add_history_to_agent(invalid_history) + + def test_agent_run_with_observer_success_with_agent_text(nexent_agent_instance, mock_core_agent): """Test successful agent_run_with_observer with AgentText final answer.""" # Setup @@ -1103,7 +1152,7 @@ def test_agent_run_with_observer_success_with_agent_text(nexent_agent_instance, "Final answer with thinking content") mock_core_agent.run.return_value = [mock_action_step] - mock_core_agent.run.return_value[-1].final_answer = mock_final_answer + mock_core_agent.run.return_value[-1].output = mock_final_answer # Execute nexent_agent_instance.agent_run_with_observer("test query") @@ -1129,7 +1178,7 @@ def test_agent_run_with_observer_success_with_string_final_answer(nexent_agent_i mock_action_step.error = None mock_core_agent.run.return_value = [mock_action_step] - mock_core_agent.run.return_value[-1].final_answer = "String final answer with thinking" + mock_core_agent.run.return_value[-1].output = "String final answer with thinking" # Execute nexent_agent_instance.agent_run_with_observer("test query") @@ -1153,7 +1202,7 @@ def test_agent_run_with_observer_with_error_in_step(nexent_agent_instance, mock_ mock_action_step.error = "Test error occurred" mock_core_agent.run.return_value = [mock_action_step] - mock_core_agent.run.return_value[-1].final_answer = "Final answer" + mock_core_agent.run.return_value[-1].output = "Final answer" # Execute nexent_agent_instance.agent_run_with_observer("test query") @@ -1176,7 +1225,7 @@ def test_agent_run_with_observer_skips_non_action_step(nexent_agent_instance, mo mock_action_step.error = None mock_core_agent.run.return_value = [mock_task_step, mock_action_step] - mock_core_agent.run.return_value[-1].final_answer = "Final answer" + mock_core_agent.run.return_value[-1].output = "Final answer" # Execute nexent_agent_instance.agent_run_with_observer("test query") @@ -1199,7 +1248,7 @@ def test_agent_run_with_observer_with_stop_event_set(nexent_agent_instance, mock mock_action_step.error = None mock_core_agent.run.return_value = [mock_action_step] - mock_core_agent.run.return_value[-1].final_answer = "Final answer" + mock_core_agent.run.return_value[-1].output = "Final answer" # Execute nexent_agent_instance.agent_run_with_observer("test query") @@ -1226,6 +1275,14 @@ def test_agent_run_with_observer_with_exception(nexent_agent_instance, mock_core ) +def test_agent_run_with_observer_invalid_agent_type(nexent_agent_instance): + """Test agent_run_with_observer raises TypeError when agent is not a CoreAgent.""" + nexent_agent_instance.agent = "not_core_agent" + + with pytest.raises(TypeError, match="agent must be a CoreAgent object"): + nexent_agent_instance.agent_run_with_observer("test query") + + def test_agent_run_with_observer_with_reset_false(nexent_agent_instance, mock_core_agent): """Test agent_run_with_observer with reset=False parameter.""" # Setup @@ -1238,7 +1295,7 @@ def test_agent_run_with_observer_with_reset_false(nexent_agent_instance, mock_co mock_action_step.error = None mock_core_agent.run.return_value = [mock_action_step] - mock_core_agent.run.return_value[-1].final_answer = "Final answer" + mock_core_agent.run.return_value[-1].output = "Final answer" # Execute with reset=False nexent_agent_instance.agent_run_with_observer("test query", reset=False) diff --git a/test/sdk/core/agents/test_run_agent.py b/test/sdk/core/agents/test_run_agent.py index 0cafdd8a1..b47aec879 100644 --- a/test/sdk/core/agents/test_run_agent.py +++ b/test/sdk/core/agents/test_run_agent.py @@ -49,7 +49,7 @@ def from_mcp(cls, *args, **kwargs): # pylint: disable=unused-argument sub_mod = ModuleType(f"smolagents.{_sub}") # Populate required attributes with MagicMocks to satisfy import-time `from smolagents. import ...`. if _sub == "agents": - for _name in ["CodeAgent", "populate_template", "handle_agent_output_types", "AgentError", "AgentType"]: + for _name in ["CodeAgent", "populate_template", "handle_agent_output_types", "AgentError", "AgentType", "ActionOutput", "RunResult"]: setattr(sub_mod, _name, MagicMock(name=f"smolagents.agents.{_name}")) elif _sub == "local_python_executor": setattr(sub_mod, "fix_final_answer_code", MagicMock(name="fix_final_answer_code")) @@ -59,6 +59,7 @@ def from_mcp(cls, *args, **kwargs): # pylint: disable=unused-argument elif _sub == "models": setattr(sub_mod, "ChatMessage", MagicMock(name="smolagents.models.ChatMessage")) setattr(sub_mod, "MessageRole", MagicMock(name="smolagents.models.MessageRole")) + setattr(sub_mod, "CODEAGENT_RESPONSE_FORMAT", MagicMock(name="smolagents.models.CODEAGENT_RESPONSE_FORMAT")) # Provide a simple base class so that OpenAIModel can inherit from it class _DummyOpenAIServerModel: def __init__(self, *args, **kwargs): @@ -67,13 +68,18 @@ def __init__(self, *args, **kwargs): setattr(sub_mod, "OpenAIServerModel", _DummyOpenAIServerModel) elif _sub == "monitoring": setattr(sub_mod, "LogLevel", MagicMock(name="smolagents.monitoring.LogLevel")) + setattr(sub_mod, "Timing", MagicMock(name="smolagents.monitoring.Timing")) + setattr(sub_mod, "YELLOW_HEX", MagicMock(name="smolagents.monitoring.YELLOW_HEX")) + setattr(sub_mod, "TokenUsage", MagicMock(name="smolagents.monitoring.TokenUsage")) elif _sub == "utils": for _name in [ "AgentExecutionError", "AgentGenerationError", "AgentParsingError", + "AgentMaxStepsError", "parse_code_blobs", "truncate_content", + "extract_code_from_text", ]: setattr(sub_mod, _name, MagicMock(name=f"smolagents.utils.{_name}")) setattr(mock_smolagents, _sub, sub_mod) @@ -82,6 +88,8 @@ def __init__(self, *args, **kwargs): # Top-level exports expected directly from `smolagents` by nexent_agent.py for _name in ["ActionStep", "TaskStep", "AgentText", "handle_agent_output_types"]: setattr(mock_smolagents, _name, MagicMock(name=f"smolagents.{_name}")) +# Export Timing from monitoring submodule to top-level +setattr(mock_smolagents, "Timing", mock_smolagents.monitoring.Timing) # Also export Tool at top-level so that `from smolagents import Tool` works setattr(mock_smolagents, "Tool", mock_smolagents_tool_cls) @@ -237,9 +245,9 @@ def test_agent_run_thread_local_flow(basic_agent_run_info, monkeypatch): def test_agent_run_thread_mcp_flow(basic_agent_run_info, mock_memory_context, monkeypatch): - """Verify behaviour when an MCP host list is provided.""" - # Give the AgentRunInfo an MCP host list - basic_agent_run_info.mcp_host = ["http://mcp.server"] + """Verify behaviour when an MCP host list is provided with auto-detected transport.""" + # Give the AgentRunInfo an MCP host list (string format, auto-detect transport) + basic_agent_run_info.mcp_host = ["http://mcp.server/mcp"] # Prepare ToolCollection.from_mcp to return a context manager mock_tool_collection = MagicMock(name="ToolCollectionInstance") @@ -257,7 +265,7 @@ def test_agent_run_thread_mcp_flow(basic_agent_run_info, mock_memory_context, mo basic_agent_run_info.observer.add_message.assert_any_call("", ProcessType.AGENT_NEW_RUN, "") # ToolCollection.from_mcp should be called with the expected client list and trust_remote_code=True - expected_client_list = [{"url": "http://mcp.server"}] + expected_client_list = [{"url": "http://mcp.server/mcp", "transport": "streamable-http"}] run_agent.ToolCollection.from_mcp.assert_called_once_with(expected_client_list, trust_remote_code=True) # NexentAgent should be instantiated with mcp_tool_collection @@ -275,6 +283,116 @@ def test_agent_run_thread_mcp_flow(basic_agent_run_info, mock_memory_context, mo mock_nexent_instance.agent_run_with_observer.assert_called_once_with(query=basic_agent_run_info.query, reset=False) +def test_agent_run_thread_mcp_flow_with_explicit_transport(basic_agent_run_info, mock_memory_context, monkeypatch): + """Verify behaviour when MCP host is provided with explicit transport in dict format.""" + # Give the AgentRunInfo an MCP host list with explicit transport + basic_agent_run_info.mcp_host = [{"url": "http://mcp.server", "transport": "sse"}] + + # Prepare ToolCollection.from_mcp to return a context manager + mock_tool_collection = MagicMock(name="ToolCollectionInstance") + mock_context_manager = MagicMock(__enter__=MagicMock(return_value=mock_tool_collection), __exit__=MagicMock(return_value=None)) + monkeypatch.setattr(run_agent.ToolCollection, "from_mcp", MagicMock(return_value=mock_context_manager)) + + # Patch NexentAgent + mock_nexent_instance = MagicMock(name="NexentAgentInstance") + monkeypatch.setattr(run_agent, "NexentAgent", MagicMock(return_value=mock_nexent_instance)) + + # Execute + run_agent.agent_run_thread(basic_agent_run_info) + + # ToolCollection.from_mcp should be called with the expected client list + expected_client_list = [{"url": "http://mcp.server", "transport": "sse"}] + run_agent.ToolCollection.from_mcp.assert_called_once_with(expected_client_list, trust_remote_code=True) + + +def test_agent_run_thread_mcp_flow_mixed_formats(basic_agent_run_info, mock_memory_context, monkeypatch): + """Verify behaviour when MCP host list contains both string and dict formats.""" + # Mix of string (auto-detect) and dict (explicit) formats + basic_agent_run_info.mcp_host = [ + "http://mcp1.server/mcp", # Auto-detect: streamable-http + "http://mcp2.server/sse", # Auto-detect: sse + {"url": "http://mcp3.server/mcp", "transport": "streamable-http"}, # Explicit: streamable-http + ] + + # Prepare ToolCollection.from_mcp to return a context manager + mock_tool_collection = MagicMock(name="ToolCollectionInstance") + mock_context_manager = MagicMock(__enter__=MagicMock(return_value=mock_tool_collection), __exit__=MagicMock(return_value=None)) + monkeypatch.setattr(run_agent.ToolCollection, "from_mcp", MagicMock(return_value=mock_context_manager)) + + # Patch NexentAgent + mock_nexent_instance = MagicMock(name="NexentAgentInstance") + monkeypatch.setattr(run_agent, "NexentAgent", MagicMock(return_value=mock_nexent_instance)) + + # Execute + run_agent.agent_run_thread(basic_agent_run_info) + + # ToolCollection.from_mcp should be called with normalized client list + expected_client_list = [ + {"url": "http://mcp1.server/mcp", "transport": "streamable-http"}, + {"url": "http://mcp2.server/sse", "transport": "sse"}, + {"url": "http://mcp3.server/mcp", "transport": "streamable-http"}, + ] + run_agent.ToolCollection.from_mcp.assert_called_once_with(expected_client_list, trust_remote_code=True) + + +def test_detect_transport(): + """Test transport auto-detection logic based on URL ending.""" + # Test URLs ending with /sse + assert run_agent._detect_transport("http://server/sse") == "sse" + assert run_agent._detect_transport("https://api.example.com/sse") == "sse" + assert run_agent._detect_transport("http://localhost:3000/sse") == "sse" + + # Test URLs ending with /mcp + assert run_agent._detect_transport("http://server/mcp") == "streamable-http" + assert run_agent._detect_transport("https://api.example.com/mcp") == "streamable-http" + assert run_agent._detect_transport("http://localhost:3000/mcp") == "streamable-http" + + # Test default fallback (no /sse or /mcp ending) + assert run_agent._detect_transport("http://server") == "streamable-http" + assert run_agent._detect_transport("https://api.example.com") == "streamable-http" + assert run_agent._detect_transport("http://server/other") == "streamable-http" + + +def test_normalize_mcp_config(): + """Test MCP configuration normalization.""" + # Test string format (auto-detect based on URL ending) + result = run_agent._normalize_mcp_config("http://server/mcp") + assert result == {"url": "http://server/mcp", "transport": "streamable-http"} + + result = run_agent._normalize_mcp_config("http://server/sse") + assert result == {"url": "http://server/sse", "transport": "sse"} + + # Test string format without /sse or /mcp ending (defaults to streamable-http) + result = run_agent._normalize_mcp_config("http://server") + assert result == {"url": "http://server", "transport": "streamable-http"} + + # Test dict format with explicit transport + result = run_agent._normalize_mcp_config({"url": "http://server/mcp", "transport": "sse"}) + assert result == {"url": "http://server/mcp", "transport": "sse"} + + # Test dict format without transport (auto-detect) + result = run_agent._normalize_mcp_config({"url": "http://server/sse"}) + assert result == {"url": "http://server/sse", "transport": "sse"} + + result = run_agent._normalize_mcp_config({"url": "http://server/mcp"}) + assert result == {"url": "http://server/mcp", "transport": "streamable-http"} + + # Test invalid dict (missing url) + with pytest.raises(ValueError, match="must contain 'url' key"): + run_agent._normalize_mcp_config({"transport": "sse"}) + + # Test invalid transport type + with pytest.raises(ValueError, match="Invalid transport type"): + run_agent._normalize_mcp_config({"url": "http://server/mcp", "transport": "stdio"}) + + with pytest.raises(ValueError, match="Invalid transport type"): + run_agent._normalize_mcp_config({"url": "http://server/mcp", "transport": "invalid"}) + + # Test invalid type + with pytest.raises(ValueError, match="Invalid MCP host item type"): + run_agent._normalize_mcp_config(123) + + def test_agent_run_thread_handles_internal_exception(basic_agent_run_info, mock_memory_context, monkeypatch): """If an internal error occurs, the observer should be notified and a ValueError propagated.""" # Configure NexentAgent.create_single_agent to raise an exception