diff --git a/frontend/public/locales/en/common.json b/frontend/public/locales/en/common.json
index 5d867fe64..80b1b230d 100644
--- a/frontend/public/locales/en/common.json
+++ b/frontend/public/locales/en/common.json
@@ -854,7 +854,7 @@
   "mcpConfig.modal.updatingTools": "Updating tools list...",
   "mcpConfig.addServer.title": "Add MCP Server",
   "mcpConfig.addServer.namePlaceholder": "Server name",
-  "mcpConfig.addServer.urlPlaceholder": "Server URL (e.g.: http://localhost:3001/sse), currently only SSE protocol supported",
+  "mcpConfig.addServer.urlPlaceholder": "Server URL (e.g.: http://localhost:3001/mcp), currently supports sse and streamable-http protocols",
   "mcpConfig.addServer.button.add": "Add",
   "mcpConfig.addServer.button.updating": "Updating...",
   "mcpConfig.serverList.title": "Configured MCP Servers",
diff --git a/frontend/public/locales/zh/common.json b/frontend/public/locales/zh/common.json
index dded1efb0..77a1b2c2e 100644
--- a/frontend/public/locales/zh/common.json
+++ b/frontend/public/locales/zh/common.json
@@ -854,7 +854,7 @@
   "mcpConfig.modal.updatingTools": "正在更新工具列表...",
   "mcpConfig.addServer.title": "添加MCP服务器",
   "mcpConfig.addServer.namePlaceholder": "服务器名称",
-  "mcpConfig.addServer.urlPlaceholder": "服务器URL (如: http://localhost:3001/sse)，目前仅支持sse协议",
+  "mcpConfig.addServer.urlPlaceholder": "服务器URL (如: http://localhost:3001/mcp)，目前支持sse和streamable-http协议",
   "mcpConfig.addServer.button.add": "添加",
   "mcpConfig.addServer.button.updating": "更新中...",
   "mcpConfig.serverList.title": "已配置的MCP服务器",
diff --git a/sdk/nexent/core/agents/agent_model.py b/sdk/nexent/core/agents/agent_model.py
index 6eff00718..f3c5a77b7 100644
--- a/sdk/nexent/core/agents/agent_model.py
+++ b/sdk/nexent/core/agents/agent_model.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from threading import Event
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 
 from pydantic import BaseModel, Field
 
@@ -50,7 +50,12 @@ class AgentRunInfo(BaseModel):
     model_config_list: List[ModelConfig] = Field(description="List of model configurations")
     observer: MessageObserver = Field(description="Return data")
     agent_config: AgentConfig = Field(description="Detailed Agent configuration")
-    mcp_host: Optional[List[str]] = Field(description="MCP server address", default=None)
+    mcp_host: Optional[List[Union[str, Dict[str, Any]]]] = Field(
+        description="MCP server address(es). Can be a string (URL) or dict with 'url' and 'transport' keys. "
+        "Transport can be 'sse' or 'streamable-http'. If string, transport is auto-detected based on URL ending: "
+        "URLs ending with '/sse' use 'sse' transport, URLs ending with '/mcp' use 'streamable-http' transport.",
+        default=None
+    )
     history: Optional[List[AgentHistory]] = Field(description="Historical conversation information", default=None)
     stop_event: Event = Field(description="Stop event control")
 
diff --git a/sdk/nexent/core/agents/core_agent.py b/sdk/nexent/core/agents/core_agent.py
index 826ef7093..be7b83b5e 100644
--- a/sdk/nexent/core/agents/core_agent.py
+++ b/sdk/nexent/core/agents/core_agent.py
@@ -1,3 +1,4 @@
+import json
 import re
 import ast
 import time
@@ -9,12 +10,13 @@
 from rich.console import Group
 from rich.text import Text
 
-from smolagents.agents import CodeAgent, handle_agent_output_types, AgentError
+from smolagents.agents import CodeAgent, handle_agent_output_types, AgentError, ActionOutput, RunResult
 from smolagents.local_python_executor import fix_final_answer_code
 from smolagents.memory import ActionStep, PlanningStep, FinalAnswerStep, ToolCall, TaskStep, SystemPromptStep
-from smolagents.models import ChatMessage
-from smolagents.monitoring import LogLevel
-from smolagents.utils import AgentExecutionError, AgentGenerationError, truncate_content
+from smolagents.models import ChatMessage, CODEAGENT_RESPONSE_FORMAT
+from smolagents.monitoring import LogLevel, Timing, YELLOW_HEX, TokenUsage
+from smolagents.utils import AgentExecutionError, AgentGenerationError, truncate_content, AgentMaxStepsError, \
+    extract_code_from_text
 
 from ..utils.observer import MessageObserver, ProcessType
 from jinja2 import Template, StrictUndefined
@@ -125,13 +127,17 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[Any]:
 
         # Add new step in logs
         memory_step.model_input_messages = input_messages
+        stop_sequences = ["<END_CODE>", "Observation:", "Calling tools:", "<END_CODE"]
+
         try:
-            additional_args = {
-                "grammar": self.grammar} if self.grammar is not None else {}
+            additional_args: dict[str, Any] = {}
+            if self._use_structured_outputs_internally:
+                additional_args["response_format"] = CODEAGENT_RESPONSE_FORMAT
             chat_message: ChatMessage = self.model(input_messages,
-                                                   stop_sequences=["<END_CODE>", "Observation:", "Calling tools:", "<END_CODE"], **additional_args)
+                                                   stop_sequences=stop_sequences, **additional_args)
             memory_step.model_output_message = chat_message
             model_output = chat_message.content
+            memory_step.token_usage = chat_message.token_usage
             memory_step.model_output = model_output
 
             self.logger.log_markdown(
@@ -145,7 +151,13 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[Any]:
 
         # Parse
         try:
-            code_action = fix_final_answer_code(parse_code_blobs(model_output))
+            if self._use_structured_outputs_internally:
+                code_action = json.loads(model_output)["code"]
+                code_action = extract_code_from_text(code_action, self.code_block_tags) or code_action
+            else:
+                code_action = parse_code_blobs(model_output)
+            code_action = fix_final_answer_code(code_action)
+            memory_step.code_action = code_action
             # Record parsing results
             self.observer.add_message(
                 self.agent_name, ProcessType.PARSE, code_action)
@@ -155,26 +167,29 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[Any]:
                 content=model_output, title="AGENT FINAL ANSWER", level=LogLevel.INFO)
             raise FinalAnswerError()
 
-        memory_step.tool_calls = [
-            ToolCall(name="python_interpreter", arguments=code_action, id=f"call_{len(self.memory.steps)}", )]
+        tool_call = ToolCall(
+            name="python_interpreter",
+            arguments=code_action,
+            id=f"call_{len(self.memory.steps)}",
+        )
+        memory_step.tool_calls = [tool_call]
 
         # Execute
         self.logger.log_code(title="Executing parsed code:",
                              content=code_action, level=LogLevel.INFO)
-        is_final_answer = False
         try:
-            output, execution_logs, is_final_answer = self.python_executor(
-                code_action)
-
+            code_output = self.python_executor(code_action)
             execution_outputs_console = []
-            if len(execution_logs) > 0:
+            if len(code_output.logs) > 0:
                 # Record execution results
                 self.observer.add_message(
-                    self.agent_name, ProcessType.EXECUTION_LOGS, f"{execution_logs}")
+                    self.agent_name, ProcessType.EXECUTION_LOGS, f"{code_output.logs}")
 
                 execution_outputs_console += [
-                    Text("Execution logs:", style="bold"), Text(execution_logs), ]
-            observation = "Execution logs:\n" + execution_logs
+                    Text("Execution logs:", style="bold"),
+                    Text(code_output.logs),
+                ]
+            observation = "Execution logs:\n" + code_output.logs
         except Exception as e:
             if hasattr(self.python_executor, "state") and "_print_outputs" in self.python_executor.state:
                 execution_logs = str(
@@ -196,20 +211,24 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[Any]:
                     level=LogLevel.INFO, )
             raise AgentExecutionError(error_msg, self.logger)
 
-        truncated_output = truncate_content(str(output))
-        if output is not None:
+        truncated_output = None
+        if code_output is not None and code_output.output is not None:
+            truncated_output = truncate_content(str(code_output.output))
             observation += "Last output from code snippet:\n" + truncated_output
         memory_step.observations = observation
 
-        execution_outputs_console += [
-            Text(f"{('Out - Final answer' if is_final_answer else 'Out')}: {truncated_output}",
-                 style=("bold #d4b702" if is_final_answer else ""), ), ]
+        if not code_output.is_final_answer and truncated_output is not None:
+            execution_outputs_console += [
+                Text(
+                    f"Out: {truncated_output}",
+                ),
+            ]
         self.logger.log(Group(*execution_outputs_console), level=LogLevel.INFO)
-        memory_step.action_output = output
-        yield output if is_final_answer else None
+        memory_step.action_output = code_output.output
+        yield ActionOutput(output=code_output.output, is_final_answer=code_output.is_final_answer)
 
     def run(self, task: str, stream: bool = False, reset: bool = True, images: Optional[List[str]] = None,
-            additional_args: Optional[Dict] = None, max_steps: Optional[int] = None, ):
+            additional_args: Optional[Dict] = None, max_steps: Optional[int] = None, return_full_result: bool | None = None):
         """
         Run the agent for the given task.
 
@@ -220,6 +239,8 @@ def run(self, task: str, stream: bool = False, reset: bool = True, images: Optio
             images (`list[str]`, *optional*): Paths to image(s).
             additional_args (`dict`, *optional*): Any other variables that you want to pass to the agent run, for instance images or dataframes. Give them clear names!
             max_steps (`int`, *optional*): Maximum number of steps the agent can take to solve the task. if not provided, will use the agent's default value.
+            return_full_result (`bool`, *optional*): Whether to return the full [`RunResult`] object or just the final answer output.
+                If `None` (default), the agent's `self.return_full_result` setting is used.
 
         Example:
         ```py
@@ -236,7 +257,6 @@ def run(self, task: str, stream: bool = False, reset: bool = True, images: Optio
 You have been provided with these additional arguments, that you can access using the keys as variables in your python code:
 {str(additional_args)}."""
 
-        self.system_prompt = self.initialize_system_prompt()
         self.memory.system_prompt = SystemPromptStep(
             system_prompt=self.system_prompt)
         if reset:
@@ -261,8 +281,47 @@ def run(self, task: str, stream: bool = False, reset: bool = True, images: Optio
         if stream:
             # The steps are returned as they are executed through a generator to iterate on.
             return self._run_stream(task=self.task, max_steps=max_steps, images=images)
+        run_start_time = time.time()
+        steps = list(self._run_stream(task=self.task, max_steps=max_steps, images=images))
+
         # Outputs are returned only at the end. We only look at the last step.
-        return list(self._run_stream(task=self.task, max_steps=max_steps, images=images))[-1].final_answer
+        assert isinstance(steps[-1], FinalAnswerStep)
+        output = steps[-1].output
+
+        return_full_result = return_full_result if return_full_result is not None else self.return_full_result
+        if return_full_result:
+            total_input_tokens = 0
+            total_output_tokens = 0
+            correct_token_usage = True
+            for step in self.memory.steps:
+                if isinstance(step, (ActionStep, PlanningStep)):
+                    if step.token_usage is None:
+                        correct_token_usage = False
+                        break
+                    else:
+                        total_input_tokens += step.token_usage.input_tokens
+                        total_output_tokens += step.token_usage.output_tokens
+            if correct_token_usage:
+                token_usage = TokenUsage(input_tokens=total_input_tokens, output_tokens=total_output_tokens)
+            else:
+                token_usage = None
+
+            if self.memory.steps and isinstance(getattr(self.memory.steps[-1], "error", None), AgentMaxStepsError):
+                state = "max_steps_error"
+            else:
+                state = "success"
+
+            step_dicts = self.memory.get_full_steps()
+
+            return RunResult(
+                output=output,
+                token_usage=token_usage,
+                steps=step_dicts,
+                timing=Timing(start_time=run_start_time, end_time=time.time()),
+                state=state,
+            )
+
+        return output
 
     def __call__(self, task: str, **kwargs):
         """Adds additional prompting for the managed agent, runs it, and wraps the output.
@@ -271,7 +330,11 @@ def __call__(self, task: str, **kwargs):
         full_task = Template(self.prompt_templates["managed_agent"]["task"], undefined=StrictUndefined).render({
             "name": self.name, "task": task, **self.state
         })
-        report = self.run(full_task, **kwargs)
+        result = self.run(full_task, **kwargs)
+        if isinstance(result, RunResult):
+            report = result.output
+        else:
+            report = result
 
         # When a sub-agent finishes running, return a marker
         try:
@@ -286,7 +349,7 @@ def __call__(self, task: str, **kwargs):
         if self.provide_run_summary:
             answer += "\n\nFor more detail, find below a summary of this agent's work:\n<summary_of_work>\n"
             for message in self.write_memory_to_messages(summary_mode=True):
-                content = message["content"]
+                content = message.content
                 answer += "\n" + truncate_content(str(content)) + "\n---"
             answer += "\n</summary_of_work>"
         return answer
@@ -295,28 +358,44 @@ def _run_stream(
             self, task: str, max_steps: int, images: list["PIL.Image.Image"] | None = None
     ) -> Generator[ActionStep | PlanningStep | FinalAnswerStep]:
         final_answer = None
+        action_step = None
         self.step_number = 1
-        while final_answer is None and self.step_number <= max_steps and not self.stop_event.is_set():
+        returned_final_answer = False
+        while not returned_final_answer and self.step_number <= max_steps and not self.stop_event.is_set():
             step_start_time = time.time()
 
             action_step = ActionStep(
-                step_number=self.step_number, start_time=step_start_time, observations_images=images
+                step_number=self.step_number, timing=Timing(start_time=step_start_time), observations_images=images
             )
             try:
-                for el in self._execute_step(action_step):
-                    yield el
-                final_answer = el
+                for output in self._step_stream(action_step):
+                    yield output
+
+                if isinstance(output, ActionOutput) and output.is_final_answer:
+                    final_answer = output.output
+                    self.logger.log(
+                        Text(f"Final answer: {final_answer}", style=f"bold {YELLOW_HEX}"),
+                        level=LogLevel.INFO,
+                    )
+
+                    if self.final_answer_checks:
+                        self._validate_final_answer(final_answer)
+                    returned_final_answer = True
+                    action_step.is_final_answer = True
+
             except FinalAnswerError:
                 # When the model does not output code, directly treat the large model content as the final answer
                 final_answer = action_step.model_output
                 if isinstance(final_answer, str):
                     final_answer = convert_code_format(final_answer)
+                returned_final_answer = True
+                action_step.is_final_answer = True
 
             except AgentError as e:
                 action_step.error = e
 
             finally:
-                self._finalize_step(action_step, step_start_time)
+                self._finalize_step(action_step)
                 self.memory.steps.append(action_step)
                 yield action_step
                 self.step_number += 1
@@ -324,8 +403,7 @@ def _run_stream(
         if self.stop_event.is_set():
             final_answer = "<user_break>"
 
-        if final_answer is None and self.step_number == max_steps + 1:
-            final_answer = self._handle_max_steps_reached(
-                task, images, step_start_time)
+        if not returned_final_answer and self.step_number == max_steps + 1:
+            final_answer = self._handle_max_steps_reached(task)
             yield action_step
         yield FinalAnswerStep(handle_agent_output_types(final_answer))
diff --git a/sdk/nexent/core/agents/nexent_agent.py b/sdk/nexent/core/agents/nexent_agent.py
index f0f932389..d109e3428 100644
--- a/sdk/nexent/core/agents/nexent_agent.py
+++ b/sdk/nexent/core/agents/nexent_agent.py
@@ -1,8 +1,9 @@
 import re
+import time
 from threading import Event
 from typing import List
 
-from smolagents import ActionStep, AgentText, TaskStep
+from smolagents import ActionStep, AgentText, TaskStep, Timing
 from smolagents.tools import Tool
 
 from ..models.openai_llm import OpenAIModel
@@ -195,7 +196,9 @@ def add_history_to_agent(self, history: List[AgentHistory]):
                 # Create task step for user message
                 self.agent.memory.steps.append(TaskStep(task=msg.content))
             elif msg.role == 'assistant':
-                self.agent.memory.steps.append(ActionStep(action_output=msg.content, model_output=msg.content))
+                self.agent.memory.steps.append(ActionStep(step_number=len(self.agent.memory.steps) + 1,
+                                                          timing=Timing(start_time=time.time()),
+                                                          action_output=msg.content, model_output=msg.content))
 
     def agent_run_with_observer(self, query: str, reset=True):
         if not isinstance(self.agent, CoreAgent):
@@ -214,7 +217,7 @@ def agent_run_with_observer(self, query: str, reset=True):
                 if hasattr(step_log, "error") and step_log.error is not None:
                     observer.add_message("", ProcessType.ERROR, str(step_log.error))
 
-            final_answer = step_log.final_answer  # Last log is the run's final_answer
+            final_answer = step_log.output  # Last log is the run's final_answer
 
             if isinstance(final_answer, AgentText):
                 final_answer_str = convert_code_format(final_answer.to_string())
diff --git a/sdk/nexent/core/agents/run_agent.py b/sdk/nexent/core/agents/run_agent.py
index 41429367a..8a5a67517 100644
--- a/sdk/nexent/core/agents/run_agent.py
+++ b/sdk/nexent/core/agents/run_agent.py
@@ -1,6 +1,7 @@
 import asyncio
 import logging
 from threading import Thread
+from typing import Any, Dict, Union
 
 from smolagents import ToolCollection
 
@@ -13,6 +14,56 @@
 monitoring_manager = get_monitoring_manager()
 
 
+def _detect_transport(url: str) -> str:
+    """
+    Auto-detect MCP transport type based on URL format.
+    
+    Args:
+        url: MCP server URL
+        
+    Returns:
+        Transport type: 'sse' or 'streamable-http'
+    """
+    url_stripped = url.strip()
+    
+    # Check URL ending to determine transport type
+    if url_stripped.endswith("/sse"):
+        return "sse"
+    elif url_stripped.endswith("/mcp"):
+        return "streamable-http"
+    
+    # Default to streamable-http for unrecognized formats
+    return "streamable-http"
+
+
+def _normalize_mcp_config(mcp_host_item: Union[str, Dict[str, Any]]) -> Dict[str, Any]:
+    """
+    Normalize MCP host configuration to a dictionary format.
+    
+    Args:
+        mcp_host_item: Either a string URL or a dict with 'url' and optional 'transport'
+        
+    Returns:
+        Dictionary with 'url' and 'transport' keys
+    """
+    if isinstance(mcp_host_item, str):
+        url = mcp_host_item
+        transport = _detect_transport(url)
+        return {"url": url, "transport": transport}
+    elif isinstance(mcp_host_item, dict):
+        url = mcp_host_item.get("url")
+        if not url:
+            raise ValueError("MCP host dict must contain 'url' key")
+        transport = mcp_host_item.get("transport")
+        if not transport:
+            transport = _detect_transport(url)
+        if transport not in ("sse", "streamable-http"):
+            raise ValueError(f"Invalid transport type: {transport}. Must be 'sse' or 'streamable-http'")
+        return {"url": url, "transport": transport}
+    else:
+        raise ValueError(f"Invalid MCP host item type: {type(mcp_host_item)}. Must be str or dict")
+
+
 @monitoring_manager.monitor_endpoint("agent_run_thread", "agent_run_thread")
 def agent_run_thread(agent_run_info: AgentRunInfo):
     try:
@@ -31,7 +82,8 @@ def agent_run_thread(agent_run_info: AgentRunInfo):
         else:
             agent_run_info.observer.add_message(
                 "", ProcessType.AGENT_NEW_RUN, "<MCP_START>")
-            mcp_client_list = [{"url": mcp_url} for mcp_url in mcp_host]
+            # Normalize MCP host configurations to support both string and dict formats
+            mcp_client_list = [_normalize_mcp_config(item) for item in mcp_host]
 
             with ToolCollection.from_mcp(mcp_client_list, trust_remote_code=True) as tool_collection:
                 nexent = NexentAgent(
diff --git a/sdk/nexent/core/models/openai_llm.py b/sdk/nexent/core/models/openai_llm.py
index 1a52e2d29..e4cfbc990 100644
--- a/sdk/nexent/core/models/openai_llm.py
+++ b/sdk/nexent/core/models/openai_llm.py
@@ -46,7 +46,7 @@ def __init__(self, observer: MessageObserver, temperature=0.2, top_p=0.95,
 
     @get_monitoring_manager().monitor_llm_call("openai_chat", "chat_completion")
     def __call__(self, messages: List[Dict[str, Any]], stop_sequences: Optional[List[str]] = None,
-                 grammar: Optional[str] = None, tools_to_call_from: Optional[List[Tool]] = None, **kwargs, ) -> ChatMessage:
+                 response_format: dict[str, str] | None = None, tools_to_call_from: Optional[List[Tool]] = None, **kwargs, ) -> ChatMessage:
         # Get token tracker from decorator (if monitoring is available)
         token_tracker = kwargs.pop('_token_tracker', None)
 
@@ -63,7 +63,7 @@ def __call__(self, messages: List[Dict[str, Any]], stop_sequences: Optional[List
 
         completion_kwargs = self._prepare_completion_kwargs(
             messages=messages, stop_sequences=stop_sequences,
-            grammar=grammar, tools_to_call_from=tools_to_call_from, model=self.model_id,
+            response_format=response_format, tools_to_call_from=tools_to_call_from, model=self.model_id,
             custom_role_conversions=self.custom_role_conversions, convert_images_to_image_urls=True,
             temperature=self.temperature, top_p=self.top_p, **kwargs,
         )
diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml
index 453857a1d..1e1369fb7 100644
--- a/sdk/pyproject.toml
+++ b/sdk/pyproject.toml
@@ -31,14 +31,14 @@ dependencies = [
     "rich>=13.9.4",
     "setuptools>=75.1.0",
     "websockets>=14.2",
-    "smolagents[mcp]==1.15.0",
+    "smolagents[mcp]==1.23.0",
     "Pillow>=10.0.0",
     "aiohttp>=3.1.13",
     "jieba>=0.42.1",
     "boto3>=1.37.34",
     "botocore>=1.37.34",
     "python-multipart>=0.0.20",
-    "mcpadapt==0.1.9",
+    "mcpadapt>=0.1.13",
     "mcp==1.10.1",
     "tiktoken>=0.5.0",
     "tavily-python",
diff --git a/test/sdk/core/agents/test_core_agent.py b/test/sdk/core/agents/test_core_agent.py
index cb6240893..54b725620 100644
--- a/test/sdk/core/agents/test_core_agent.py
+++ b/test/sdk/core/agents/test_core_agent.py
@@ -1,3 +1,5 @@
+import json
+
 import pytest
 from unittest.mock import MagicMock, patch
 from threading import Event
@@ -14,22 +16,98 @@ def __init__(self, message):
         super().__init__(message)
 
 
+class MockAgentMaxStepsError(Exception):
+    pass
+
+
 # Mock for smolagents and its sub-modules
 mock_smolagents = MagicMock()
-mock_smolagents.ActionStep = MagicMock()
-mock_smolagents.TaskStep = MagicMock()
-mock_smolagents.SystemPromptStep = MagicMock()
 mock_smolagents.AgentError = MockAgentError
 
 mock_smolagents.handle_agent_output_types = MagicMock(
     return_value="handled_output")
+mock_smolagents.utils.AgentMaxStepsError = MockAgentMaxStepsError
+
+# Create proper class types for isinstance checks (not MagicMock)
+class MockActionStep:
+    def __init__(self, *args, **kwargs):
+        self.step_number = kwargs.get('step_number', 1)
+        self.timing = kwargs.get('timing', None)
+        self.observations_images = kwargs.get('observations_images', None)
+        self.model_input_messages = None
+        self.model_output_message = None
+        self.model_output = None
+        self.token_usage = None
+        self.code_action = None
+        self.tool_calls = None
+        self.observations = None
+        self.action_output = None
+        self.is_final_answer = False
+        self.error = None
+
+class MockTaskStep:
+    def __init__(self, *args, **kwargs):
+        self.task = kwargs.get('task', '')
+        self.task_images = kwargs.get('task_images', None)
+
+class MockSystemPromptStep:
+    def __init__(self, *args, **kwargs):
+        self.system_prompt = kwargs.get('system_prompt', '')
+
+class MockFinalAnswerStep:
+    def __init__(self, *args, **kwargs):
+        # Handle both positional and keyword arguments
+        if args:
+            self.output = args[0]
+        else:
+            self.output = kwargs.get('output', '')
+
+class MockPlanningStep:
+    def __init__(self, *args, **kwargs):
+        self.token_usage = kwargs.get('token_usage', None)
+
+class MockActionOutput:
+    def __init__(self, *args, **kwargs):
+        self.output = kwargs.get('output', None)
+        self.is_final_answer = kwargs.get('is_final_answer', False)
+
+class MockRunResult:
+    def __init__(self, *args, **kwargs):
+        self.output = kwargs.get('output', None)
+        self.token_usage = kwargs.get('token_usage', None)
+        self.steps = kwargs.get('steps', [])
+        self.timing = kwargs.get('timing', None)
+        self.state = kwargs.get('state', 'success')
+
+class MockCodeOutput:
+    """Mock object returned by python_executor."""
+    def __init__(self, output=None, logs="", is_final_answer=False):
+        self.output = output
+        self.logs = logs
+        self.is_final_answer = is_final_answer
+
+# Assign proper classes to mock_smolagents
+mock_smolagents.ActionStep = MockActionStep
+mock_smolagents.TaskStep = MockTaskStep
+mock_smolagents.SystemPromptStep = MockSystemPromptStep
 
 # Create dummy smolagents sub-modules
 for sub_mod in ["agents", "memory", "models", "monitoring", "utils", "local_python_executor"]:
     mock_module = MagicMock()
     setattr(mock_smolagents, sub_mod, mock_module)
 
+# Assign classes to memory submodule
+mock_smolagents.memory.ActionStep = MockActionStep
+mock_smolagents.memory.TaskStep = MockTaskStep
+mock_smolagents.memory.SystemPromptStep = MockSystemPromptStep
+mock_smolagents.memory.FinalAnswerStep = MockFinalAnswerStep
+mock_smolagents.memory.PlanningStep = MockPlanningStep
+mock_smolagents.memory.ToolCall = MagicMock
+
+# Assign classes to agents submodule
 mock_smolagents.agents.CodeAgent = MagicMock
+mock_smolagents.agents.ActionOutput = MockActionOutput
+mock_smolagents.agents.RunResult = MockRunResult
 
 # Provide actual implementations for commonly used utils functions
 
@@ -72,6 +150,23 @@ def mock_truncate_content(content, max_length=1000):
     core_agent_module = sys.modules['sdk.nexent.core.agents.core_agent']
     # Override AgentError inside the imported module to ensure it has message attr
     core_agent_module.AgentError = MockAgentError
+    core_agent_module.AgentMaxStepsError = MockAgentMaxStepsError
+    # Override classes to use our mock classes for isinstance checks
+    core_agent_module.FinalAnswerStep = MockFinalAnswerStep
+    core_agent_module.ActionStep = MockActionStep
+    core_agent_module.PlanningStep = MockPlanningStep
+    core_agent_module.ActionOutput = MockActionOutput
+    core_agent_module.RunResult = MockRunResult
+    # Override CodeAgent to be a proper class that can be inherited
+    class MockCodeAgent:
+        def __init__(self, prompt_templates=None, *args, **kwargs):
+            # Accept any arguments but don't require observer
+            # Store attributes that might be accessed
+            self.prompt_templates = prompt_templates
+            # Initialize common attributes that CodeAgent might have
+            for key, value in kwargs.items():
+                setattr(self, key, value)
+    core_agent_module.CodeAgent = MockCodeAgent
     CoreAgent = ImportedCoreAgent
 
 
@@ -103,16 +198,50 @@ def core_agent_instance(mock_observer):
     agent.stop_event = Event()
     agent.memory = MagicMock()
     agent.memory.steps = []
+    agent.memory.get_full_steps = MagicMock(return_value=[])
     agent.python_executor = MagicMock()
+    
+    # Mock logger with all required methods
+    agent.logger = MagicMock()
+    agent.logger.log = MagicMock()
+    agent.logger.log_task = MagicMock()
+    agent.logger.log_markdown = MagicMock()
+    agent.logger.log_code = MagicMock()
 
     agent.step_number = 1
     agent._execute_step = MagicMock()
     agent._finalize_step = MagicMock()
     agent._handle_max_steps_reached = MagicMock()
+    
+    # Set default attributes that might be needed
+    agent.max_steps = 5
+    agent.state = {}
+    agent.system_prompt = "test system prompt"
+    agent.return_full_result = False
+    agent.provide_run_summary = False
+    agent.tools = {}
+    agent.managed_agents = {}
+    agent.monitor = MagicMock()
+    agent.monitor.reset = MagicMock()
+    agent.model = MagicMock()
+    if hasattr(agent.model, 'model_id'):
+        agent.model.model_id = "test-model"
+    agent.code_block_tags = ["```", "```"]
+    agent._use_structured_outputs_internally = False
+    agent.final_answer_checks = None  # Set to avoid MagicMock creating new CoreAgent instances
 
     return agent
 
 
+@pytest.fixture(autouse=True)
+def reset_token_usage_mock():
+    """Ensure TokenUsage mock does not leak state between tests."""
+    token_usage = getattr(core_agent_module, "TokenUsage", None)
+    if hasattr(token_usage, "reset_mock"):
+        token_usage.reset_mock()
+    yield
+
+
 # ----------------------------------------------------------------------------
 # Tests for _run method
 # ----------------------------------------------------------------------------
@@ -123,11 +252,12 @@ def test_run_normal_execution(core_agent_instance):
     task = "test task"
     max_steps = 3
 
-    # Mock _execute_step to return a generator that yields final answer
-    def mock_execute_generator(action_step):
-        yield "final_answer"
+    # Mock _step_stream to return a generator that yields ActionOutput with final answer
+    def mock_step_stream(action_step):
+        action_output = MockActionOutput(output="final_answer", is_final_answer=True)
+        yield action_output
 
-    with patch.object(core_agent_instance, '_execute_step', side_effect=mock_execute_generator) as mock_execute_step, \
+    with patch.object(core_agent_instance, '_step_stream', side_effect=mock_step_stream) as mock_step_stream_patch, \
             patch.object(core_agent_instance, '_finalize_step') as mock_finalize_step:
         core_agent_instance.step_number = 1
 
@@ -135,11 +265,11 @@ def mock_execute_generator(action_step):
         result = list(core_agent_instance._run_stream(task, max_steps))
 
         # Assertions
-        # _run_stream yields: generator output + action step + final answer step
+        # _run_stream yields: ActionOutput from _step_stream + action step + final answer step
         assert len(result) == 3
-        assert result[0] == "final_answer"  # Generator output
-        assert isinstance(result[1], MagicMock)  # Action step
-        assert isinstance(result[2], MagicMock)  # Final answer step
+        assert isinstance(result[0], MockActionOutput)  # ActionOutput from _step_stream
+        assert isinstance(result[1], MockActionStep)  # Action step
+        assert isinstance(result[2], MockFinalAnswerStep)  # Final answer step
 
 
 def test_run_with_max_steps_reached(core_agent_instance):
@@ -148,11 +278,12 @@ def test_run_with_max_steps_reached(core_agent_instance):
     task = "test task"
     max_steps = 2
 
-    # Mock _execute_step to return None (no final answer)
-    def mock_execute_generator(action_step):
-        yield None
+    # Mock _step_stream to return ActionOutput without final answer
+    def mock_step_stream(action_step):
+        action_output = MockActionOutput(output=None, is_final_answer=False)
+        yield action_output
 
-    with patch.object(core_agent_instance, '_execute_step', side_effect=mock_execute_generator) as mock_execute_step, \
+    with patch.object(core_agent_instance, '_step_stream', side_effect=mock_step_stream) as mock_step_stream_patch, \
             patch.object(core_agent_instance, '_finalize_step') as mock_finalize_step, \
             patch.object(core_agent_instance, '_handle_max_steps_reached',
                          return_value="max_steps_reached") as mock_handle_max:
@@ -162,18 +293,19 @@ def mock_execute_generator(action_step):
         result = list(core_agent_instance._run_stream(task, max_steps))
 
         # Assertions
-        # For 2 steps: (None + action_step) * 2 + final_action_step + final_answer_step = 6
-        assert len(result) == 6
-        assert result[0] is None  # First generator output
-        assert isinstance(result[1], MagicMock)  # First action step
-        assert result[2] is None  # Second generator output
-        assert isinstance(result[3], MagicMock)  # Second action step
-        # Final action step (from _handle_max_steps_reached)
-        assert isinstance(result[4], MagicMock)
-        assert isinstance(result[5], MagicMock)  # Final answer step
+        # For 2 steps: (ActionOutput + action_step) * 2 + final_action_step + final_answer_step = 6
+        assert len(result) >= 5
+        # First step: ActionOutput + ActionStep
+        assert isinstance(result[0], MockActionOutput)  # First ActionOutput
+        assert isinstance(result[1], MockActionStep)  # First action step
+        # Second step: ActionOutput + ActionStep
+        assert isinstance(result[2], MockActionOutput)  # Second ActionOutput
+        assert isinstance(result[3], MockActionStep)  # Second action step
+        # Last should be final answer step
+        assert isinstance(result[-1], MockFinalAnswerStep)  # Final answer step
 
         # Verify method calls
-        assert mock_execute_step.call_count == 2
+        assert mock_step_stream_patch.call_count == 2
         mock_handle_max.assert_called_once()
         assert mock_finalize_step.call_count == 2
 
@@ -184,23 +316,28 @@ def test_run_with_stop_event(core_agent_instance):
     task = "test task"
     max_steps = 3
 
-    def mock_execute_generator(action_step):
+    def mock_step_stream(action_step):
         core_agent_instance.stop_event.set()
-        yield None
+        action_output = MockActionOutput(output=None, is_final_answer=False)
+        yield action_output
+
+    # Mock handle_agent_output_types to return the input value (identity function)
+    # This way when final_answer = "<user_break>", it will be passed through
+    with patch.object(core_agent_module, 'handle_agent_output_types', side_effect=lambda x: x):
+        # Mock _step_stream to set stop event
+        with patch.object(core_agent_instance, '_step_stream', side_effect=mock_step_stream):
+            with patch.object(core_agent_instance, '_finalize_step'):
+                # Execute
+                result = list(core_agent_instance._run_stream(task, max_steps))
 
-    # Mock _execute_step to set stop event
-    with patch.object(core_agent_instance, '_execute_step', side_effect=mock_execute_generator):
-        with patch.object(core_agent_instance, '_finalize_step'):
-            # Execute
-            result = list(core_agent_instance._run_stream(task, max_steps))
-
-    # Assertions
-    # Should yield: generator output + action step + final answer step
-    assert len(result) == 3
-    assert result[0] is None  # Generator output
-    assert isinstance(result[1], MagicMock)  # Action step
-    # Final answer step with "<user_break>"
-    assert isinstance(result[2], MagicMock)
+        # Assertions
+        # Should yield: ActionOutput from _step_stream + action step + final answer step
+        assert len(result) == 3
+        assert isinstance(result[0], MockActionOutput)  # ActionOutput from _step_stream
+        assert isinstance(result[1], MockActionStep)  # Action step
+        # Final answer step with "<user_break>"
+        assert isinstance(result[2], MockFinalAnswerStep)
+        assert result[2].output == "<user_break>"
 
 
 def test_run_with_final_answer_error(core_agent_instance):
@@ -209,9 +346,9 @@ def test_run_with_final_answer_error(core_agent_instance):
     task = "test task"
     max_steps = 3
 
-    # Mock _execute_step to raise FinalAnswerError
-    with patch.object(core_agent_instance, '_execute_step',
-                      side_effect=core_agent_module.FinalAnswerError()) as mock_execute_step, \
+    # Mock _step_stream to raise FinalAnswerError
+    with patch.object(core_agent_instance, '_step_stream',
+                      side_effect=core_agent_module.FinalAnswerError()) as mock_step_stream, \
             patch.object(core_agent_instance, '_finalize_step'):
         # Execute
         result = list(core_agent_instance._run_stream(task, max_steps))
@@ -219,8 +356,8 @@ def test_run_with_final_answer_error(core_agent_instance):
     # Assertions
     # When FinalAnswerError occurs, it should yield action step + final answer step
     assert len(result) == 2
-    assert isinstance(result[0], MagicMock)  # Action step
-    assert isinstance(result[1], MagicMock)  # Final answer step
+    assert isinstance(result[0], MockActionStep)  # Action step
+    assert isinstance(result[1], MockFinalAnswerStep)  # Final answer step
 
 
 def test_run_with_final_answer_error_and_model_output(core_agent_instance):
@@ -229,16 +366,12 @@ def test_run_with_final_answer_error_and_model_output(core_agent_instance):
     task = "test task"
     max_steps = 3
 
-    # Create a mock action step with model_output
-    mock_action_step = MagicMock()
-    mock_action_step.model_output = "```<DISPLAY:python>\nprint('hello')\n```<END_DISPLAY_CODE>"
-
-    # Mock _execute_step to set model_output and then raise FinalAnswerError
-    def mock_execute_step(action_step):
+    # Mock _step_stream to set model_output and then raise FinalAnswerError
+    def mock_step_stream(action_step):
         action_step.model_output = "```<DISPLAY:python>\nprint('hello')\n```<END_DISPLAY_CODE>"
         raise core_agent_module.FinalAnswerError()
 
-    with patch.object(core_agent_instance, '_execute_step', side_effect=mock_execute_step), \
+    with patch.object(core_agent_instance, '_step_stream', side_effect=mock_step_stream), \
             patch.object(core_agent_module, 'convert_code_format', return_value="```python\nprint('hello')\n```") as mock_convert, \
             patch.object(core_agent_instance, '_finalize_step'):
         # Execute
@@ -246,8 +379,8 @@ def mock_execute_step(action_step):
 
     # Assertions
     assert len(result) == 2
-    assert isinstance(result[0], MagicMock)  # Action step
-    assert isinstance(result[1], MagicMock)  # Final answer step
+    assert isinstance(result[0], MockActionStep)  # Action step
+    assert isinstance(result[1], MockFinalAnswerStep)  # Final answer step
     # Verify convert_code_format was called
     mock_convert.assert_called_once_with(
         "```<DISPLAY:python>\nprint('hello')\n```<END_DISPLAY_CODE>")
@@ -259,9 +392,9 @@ def test_run_with_agent_error_updated(core_agent_instance):
     task = "test task"
     max_steps = 3
 
-    # Mock _execute_step to raise AgentError
-    with patch.object(core_agent_instance, '_execute_step',
-                      side_effect=MockAgentError("test error")) as mock_execute_step, \
+    # Mock _step_stream to raise AgentError
+    with patch.object(core_agent_instance, '_step_stream',
+                      side_effect=MockAgentError("test error")) as mock_step_stream, \
             patch.object(core_agent_instance, '_finalize_step'):
         # Execute
         result = list(core_agent_instance._run_stream(task, max_steps))
@@ -270,9 +403,9 @@ def test_run_with_agent_error_updated(core_agent_instance):
     # When AgentError occurs, it should yield action step + final answer step
     # But the error causes the loop to continue, so we get multiple action steps
     assert len(result) >= 2
-    assert isinstance(result[0], MagicMock)  # Action step with error
+    assert isinstance(result[0], MockActionStep)  # Action step with error
     # Last item should be final answer step
-    assert isinstance(result[-1], MagicMock)  # Final answer step
+    assert isinstance(result[-1], MockFinalAnswerStep)  # Final answer step
 
 
 def test_run_with_agent_parse_error_branch_updated(core_agent_instance):
@@ -280,25 +413,40 @@ def test_run_with_agent_parse_error_branch_updated(core_agent_instance):
     task = "parse task"
     max_steps = 1
 
-    # Mock _execute_step to set model_output and then raise FinalAnswerError
-    def mock_execute_step(action_step):
+    # Mock _step_stream to set model_output and then raise FinalAnswerError
+    def mock_step_stream(action_step):
         action_step.model_output = "```<DISPLAY:python>\nprint('hello')\n```<END_DISPLAY_CODE>"
         raise core_agent_module.FinalAnswerError()
 
-    with patch.object(core_agent_instance, '_execute_step', side_effect=mock_execute_step), \
+    with patch.object(core_agent_instance, '_step_stream', side_effect=mock_step_stream), \
             patch.object(core_agent_module, 'convert_code_format', return_value="```python\nprint('hello')\n```") as mock_convert, \
             patch.object(core_agent_instance, '_finalize_step'):
         results = list(core_agent_instance._run_stream(task, max_steps))
 
     # _run should yield action step + final answer step
     assert len(results) == 2
-    assert isinstance(results[0], MagicMock)  # Action step
-    assert isinstance(results[1], MagicMock)  # Final answer step
+    assert isinstance(results[0], MockActionStep)  # Action step
+    assert isinstance(results[1], MockFinalAnswerStep)  # Final answer step
     # Verify convert_code_format was called
     mock_convert.assert_called_once_with(
         "```<DISPLAY:python>\nprint('hello')\n```<END_DISPLAY_CODE>")
 
 
+def test_run_stream_validates_final_answer_when_checks_enabled(core_agent_instance):
+    """Ensure _run_stream triggers final answer validation when checks are configured."""
+    task = "validate task"
+    core_agent_instance.final_answer_checks = ["non-empty"]
+    core_agent_instance._validate_final_answer = MagicMock()
+
+    def mock_step_stream(action_step):
+        yield MockActionOutput(output="final answer", is_final_answer=True)
+
+    with patch.object(core_agent_instance, '_step_stream', side_effect=mock_step_stream), \
+            patch.object(core_agent_instance, '_finalize_step'):
+        result = list(core_agent_instance._run_stream(task, max_steps=1))
+
+    assert len(result) == 3  # ActionOutput, ActionStep, FinalAnswerStep
+    core_agent_instance._validate_final_answer.assert_called_once_with("final answer")
 def test_convert_code_format_display_replacements():
     """Validate convert_code_format correctly transforms <DISPLAY:language> format to standard markdown."""
 
@@ -575,6 +723,10 @@ def test_step_stream_parse_success(core_agent_instance):
     core_agent_instance.step_number = 1
     core_agent_instance.grammar = None
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.memory = MagicMock()
     core_agent_instance.memory.steps = []
 
@@ -586,7 +738,7 @@ def test_step_stream_parse_success(core_agent_instance):
             return_value=[])
         core_agent_instance.model = MagicMock(return_value=mock_chat_message)
         core_agent_instance.python_executor = MagicMock(
-            return_value=("output", "logs", False))
+            return_value=MockCodeOutput(output="output", logs="logs", is_final_answer=False))
 
         # Execute
         list(core_agent_instance._step_stream(mock_memory_step))
@@ -599,6 +751,33 @@ def test_step_stream_parse_success(core_agent_instance):
         assert hasattr(mock_memory_step.tool_calls[0], 'arguments')
 
 
+def test_step_stream_structured_outputs_with_stop_sequence(core_agent_instance):
+    """Ensure _step_stream handles structured outputs correctly."""
+    mock_memory_step = MagicMock()
+    mock_chat_message = MagicMock()
+    mock_chat_message.content = json.dumps({"code": "print('hello')"})
+    mock_chat_message.token_usage = MagicMock()
+
+    core_agent_instance.agent_name = "test_agent"
+    core_agent_instance.step_number = 1
+    core_agent_instance._use_structured_outputs_internally = True
+    core_agent_instance.code_block_tags = ["<<OPEN>>", "[CLOSE]"]
+    core_agent_instance.write_memory_to_messages = MagicMock(return_value=[])
+    core_agent_instance.model = MagicMock(return_value=mock_chat_message)
+    core_agent_instance.python_executor = MagicMock(
+        return_value=MockCodeOutput(output="result", logs="", is_final_answer=False)
+    )
+
+    with patch.object(core_agent_module, 'extract_code_from_text', return_value="print('hello')") as mock_extract, \
+            patch.object(core_agent_module, 'fix_final_answer_code', side_effect=lambda code: code):
+        list(core_agent_instance._step_stream(mock_memory_step))
+
+    # Ensure structured output helpers were used
+    mock_extract.assert_called_once_with("print('hello')", core_agent_instance.code_block_tags)
+    call_kwargs = core_agent_instance.model.call_args.kwargs
+    assert call_kwargs["response_format"] == core_agent_module.CODEAGENT_RESPONSE_FORMAT
+
+
 def test_step_stream_skips_execution_for_display_only(core_agent_instance):
     """Test that _step_stream raises FinalAnswerError when only DISPLAY code blocks are present."""
     # Setup
@@ -611,6 +790,10 @@ def test_step_stream_skips_execution_for_display_only(core_agent_instance):
     core_agent_instance.step_number = 1
     core_agent_instance.grammar = None
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.memory = MagicMock()
     core_agent_instance.memory.steps = []
 
@@ -637,6 +820,10 @@ def test_step_stream_parse_failure_raises_final_answer_error(core_agent_instance
     core_agent_instance.step_number = 1
     core_agent_instance.grammar = None
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.memory = MagicMock()
     core_agent_instance.memory.steps = []
 
@@ -662,6 +849,10 @@ def test_step_stream_model_generation_error(core_agent_instance):
     core_agent_instance.step_number = 1
     core_agent_instance.grammar = None
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.memory = MagicMock()
     core_agent_instance.memory.steps = []
 
@@ -687,6 +878,10 @@ def test_step_stream_execution_success(core_agent_instance):
     core_agent_instance.step_number = 1
     core_agent_instance.grammar = None
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.memory = MagicMock()
     core_agent_instance.memory.steps = []
 
@@ -698,14 +893,16 @@ def test_step_stream_execution_success(core_agent_instance):
             return_value=[])
         core_agent_instance.model = MagicMock(return_value=mock_chat_message)
         core_agent_instance.python_executor = MagicMock(
-            return_value=("Hello World", "Execution logs", False))
+            return_value=MockCodeOutput(output="Hello World", logs="Execution logs", is_final_answer=False))
 
         # Execute
         result = list(core_agent_instance._step_stream(mock_memory_step))
 
         # Assertions
-        # Should yield None when is_final_answer is False
-        assert result[0] is None
+        # Should yield ActionOutput when is_final_answer is False
+        assert len(result) == 1
+        assert isinstance(result[0], MockActionOutput)
+        assert result[0].is_final_answer is False
         assert mock_memory_step.observations is not None
         # Check that observations was set (we can't easily test the exact content due to mock behavior)
         assert hasattr(mock_memory_step, 'observations')
@@ -723,6 +920,10 @@ def test_step_stream_execution_final_answer(core_agent_instance):
     core_agent_instance.step_number = 1
     core_agent_instance.grammar = None
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.memory = MagicMock()
     core_agent_instance.memory.steps = []
 
@@ -734,13 +935,16 @@ def test_step_stream_execution_final_answer(core_agent_instance):
             return_value=[])
         core_agent_instance.model = MagicMock(return_value=mock_chat_message)
         core_agent_instance.python_executor = MagicMock(
-            return_value=("final answer", "Execution logs", True))
+            return_value=MockCodeOutput(output="final answer", logs="Execution logs", is_final_answer=True))
 
         # Execute
         result = list(core_agent_instance._step_stream(mock_memory_step))
 
         # Assertions
-        assert result[0] == "final answer"  # Should yield the final answer
+        assert len(result) == 1
+        assert isinstance(result[0], MockActionOutput)
+        assert result[0].is_final_answer is True
+        assert result[0].output == "final answer"
 
 
 def test_step_stream_execution_error(core_agent_instance):
@@ -755,6 +959,10 @@ def test_step_stream_execution_error(core_agent_instance):
     core_agent_instance.step_number = 1
     core_agent_instance.grammar = None
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.memory = MagicMock()
     core_agent_instance.memory.steps = []
 
@@ -795,6 +1003,10 @@ def test_step_stream_observer_calls(core_agent_instance):
     core_agent_instance.step_number = 1
     core_agent_instance.grammar = None
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.memory = MagicMock()
     core_agent_instance.memory.steps = []
 
@@ -806,7 +1018,7 @@ def test_step_stream_observer_calls(core_agent_instance):
             return_value=[])
         core_agent_instance.model = MagicMock(return_value=mock_chat_message)
         core_agent_instance.python_executor = MagicMock(
-            return_value=("test", "logs", False))
+            return_value=MockCodeOutput(output="test", logs="logs", is_final_answer=False))
 
         # Execute
         list(core_agent_instance._step_stream(mock_memory_step))
@@ -847,6 +1059,10 @@ def test_step_stream_execution_with_logs(core_agent_instance):
     core_agent_instance.step_number = 1
     core_agent_instance.grammar = None
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.memory = MagicMock()
     core_agent_instance.memory.steps = []
 
@@ -859,14 +1075,16 @@ def test_step_stream_execution_with_logs(core_agent_instance):
         core_agent_instance.model = MagicMock(return_value=mock_chat_message)
         # Mock python_executor to return logs
         core_agent_instance.python_executor = MagicMock(
-            return_value=("output", "Some execution logs", False))
+            return_value=MockCodeOutput(output="output", logs="Some execution logs", is_final_answer=False))
 
         # Execute
         result = list(core_agent_instance._step_stream(mock_memory_step))
 
         # Assertions
-        # Should yield None when is_final_answer is False
-        assert result[0] is None
+        # Should yield ActionOutput when is_final_answer is False
+        assert len(result) == 1
+        assert isinstance(result[0], MockActionOutput)
+        assert result[0].is_final_answer is False
         # Check that execution logs were recorded
         assert core_agent_instance.observer.add_message.call_count >= 3
         calls = core_agent_instance.observer.add_message.call_args_list
@@ -887,6 +1105,10 @@ def test_step_stream_execution_error_with_print_outputs(core_agent_instance):
     core_agent_instance.step_number = 1
     core_agent_instance.grammar = None
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.memory = MagicMock()
     core_agent_instance.memory.steps = []
 
@@ -926,6 +1148,10 @@ def test_step_stream_execution_error_with_import_warning(core_agent_instance):
     core_agent_instance.step_number = 1
     core_agent_instance.grammar = None
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.memory = MagicMock()
     core_agent_instance.memory.steps = []
 
@@ -969,6 +1195,10 @@ def test_step_stream_execution_error_without_print_outputs(core_agent_instance):
     core_agent_instance.step_number = 1
     core_agent_instance.grammar = None
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.memory = MagicMock()
     core_agent_instance.memory.steps = []
 
@@ -1003,6 +1233,10 @@ def test_step_stream_execution_with_none_output(core_agent_instance):
     core_agent_instance.step_number = 1
     core_agent_instance.grammar = None
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.memory = MagicMock()
     core_agent_instance.memory.steps = []
 
@@ -1015,14 +1249,16 @@ def test_step_stream_execution_with_none_output(core_agent_instance):
         core_agent_instance.model = MagicMock(return_value=mock_chat_message)
         # Mock python_executor to return None output
         core_agent_instance.python_executor = MagicMock(
-            return_value=(None, "Execution logs", False))
+            return_value=MockCodeOutput(output=None, logs="Execution logs", is_final_answer=False))
 
         # Execute
         result = list(core_agent_instance._step_stream(mock_memory_step))
 
         # Assertions
-        # Should yield None when is_final_answer is False
-        assert result[0] is None
+        # Should yield ActionOutput when is_final_answer is False
+        assert len(result) == 1
+        assert isinstance(result[0], MockActionOutput)
+        assert result[0].is_final_answer is False
         assert mock_memory_step.observations is not None
         # Check that observations was set but should not contain "Last output from code snippet"
         # since output is None
@@ -1050,6 +1286,10 @@ def test_run_with_additional_args(core_agent_instance):
     core_agent_instance.monitor = MagicMock()
     core_agent_instance.monitor.reset = MagicMock()
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.model = MagicMock()
     core_agent_instance.model.model_id = "test-model"
     core_agent_instance.name = "test_agent"
@@ -1059,8 +1299,7 @@ def test_run_with_additional_args(core_agent_instance):
     core_agent_instance.observer = MagicMock()
 
     # Mock _run_stream to return a simple result
-    mock_final_step = MagicMock()
-    mock_final_step.final_answer = "final result"
+    mock_final_step = MockFinalAnswerStep(output="final result")
 
     with patch.object(core_agent_instance, '_run_stream', return_value=[mock_final_step]):
         # Execute
@@ -1089,6 +1328,10 @@ def test_run_with_stream_true(core_agent_instance):
     core_agent_instance.monitor = MagicMock()
     core_agent_instance.monitor.reset = MagicMock()
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.model = MagicMock()
     core_agent_instance.model.model_id = "test-model"
     core_agent_instance.name = "test_agent"
@@ -1123,6 +1366,10 @@ def test_run_with_reset_false(core_agent_instance):
     core_agent_instance.monitor = MagicMock()
     core_agent_instance.monitor.reset = MagicMock()
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.model = MagicMock()
     core_agent_instance.model.model_id = "test-model"
     core_agent_instance.name = "test_agent"
@@ -1132,8 +1379,7 @@ def test_run_with_reset_false(core_agent_instance):
     core_agent_instance.observer = MagicMock()
 
     # Mock _run_stream to return a simple result
-    mock_final_step = MagicMock()
-    mock_final_step.final_answer = "final result"
+    mock_final_step = MockFinalAnswerStep(output="final result")
 
     with patch.object(core_agent_instance, '_run_stream', return_value=[mock_final_step]):
         # Execute
@@ -1162,6 +1408,10 @@ def test_run_with_images(core_agent_instance):
     core_agent_instance.monitor = MagicMock()
     core_agent_instance.monitor.reset = MagicMock()
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.model = MagicMock()
     core_agent_instance.model.model_id = "test-model"
     core_agent_instance.name = "test_agent"
@@ -1171,8 +1421,7 @@ def test_run_with_images(core_agent_instance):
     core_agent_instance.observer = MagicMock()
 
     # Mock _run_stream to return a simple result
-    mock_final_step = MagicMock()
-    mock_final_step.final_answer = "final result"
+    mock_final_step = MockFinalAnswerStep(output="final result")
 
     with patch.object(core_agent_instance, '_run_stream', return_value=[mock_final_step]):
         # Execute
@@ -1185,8 +1434,89 @@ def test_run_with_images(core_agent_instance):
         call_args = core_agent_instance.memory.steps.append.call_args[0][0]
         # The TaskStep is mocked, so just verify it was called with correct arguments via the constructor
         # We'll check that TaskStep was called with the right parameters
-        mock_smolagents.memory.TaskStep.assert_called_with(
-            task=task, task_images=images)
+        assert isinstance(call_args, MockTaskStep)
+        assert call_args.task == task
+        assert call_args.task_images == images
+
+
+def test_run_return_full_result_success_state(core_agent_instance):
+    """run should return RunResult with aggregated token usage when requested."""
+    task = "test task"
+    token_usage = MagicMock(input_tokens=7, output_tokens=3)
+    action_step = core_agent_module.ActionStep()
+    action_step.token_usage = token_usage
+
+    core_agent_instance.name = "test_agent"
+    core_agent_instance.memory.steps = [action_step]
+    core_agent_instance.memory.get_full_steps = MagicMock(return_value=[{"step": "data"}])
+    core_agent_instance.memory.reset = MagicMock()
+    core_agent_instance.monitor.reset = MagicMock()
+    core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
+    core_agent_instance.model = MagicMock()
+    core_agent_instance.model.model_id = "model"
+    core_agent_instance.python_executor = MagicMock()
+    core_agent_instance.python_executor.send_variables = MagicMock()
+    core_agent_instance.python_executor.send_tools = MagicMock()
+    core_agent_instance.observer = MagicMock()
+
+    final_step = MockFinalAnswerStep(output="final result")
+    with patch.object(core_agent_instance, '_run_stream', return_value=[final_step]):
+        result = core_agent_instance.run(task, return_full_result=True)
+
+    assert isinstance(result, core_agent_module.RunResult)
+    assert result.output == "final result"
+    core_agent_module.TokenUsage.assert_called_once_with(input_tokens=7, output_tokens=3)
+    assert result.token_usage == core_agent_module.TokenUsage.return_value
+    assert result.state == "success"
+    core_agent_instance.memory.get_full_steps.assert_called_once()
+
+
+def test_run_return_full_result_max_steps_error(core_agent_instance):
+    """run should mark state as max_steps_error when the last step contains AgentMaxStepsError."""
+    task = "test task"
+
+    action_step = core_agent_module.ActionStep()
+    action_step.token_usage = None
+    action_step.error = core_agent_module.AgentMaxStepsError("max steps reached")
+
+    class StepsList(list):
+        def append(self, item):
+            # Skip storing TaskStep to keep action_step as the last element
+            if isinstance(item, core_agent_module.TaskStep):
+                return
+            super().append(item)
+
+    core_agent_instance.name = "test_agent"
+    steps_list = StepsList([action_step])
+    core_agent_instance.memory.steps = steps_list
+    core_agent_instance.memory.get_full_steps = MagicMock(return_value=[{"step": "data"}])
+    core_agent_instance.memory.reset = MagicMock()
+    core_agent_instance.monitor.reset = MagicMock()
+    core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
+    core_agent_instance.model = MagicMock()
+    core_agent_instance.model.model_id = "model"
+    core_agent_instance.python_executor = MagicMock()
+    core_agent_instance.python_executor.send_variables = MagicMock()
+    core_agent_instance.python_executor.send_tools = MagicMock()
+    core_agent_instance.observer = MagicMock()
+
+    final_step = MockFinalAnswerStep(output="final result")
+    with patch.object(core_agent_instance, '_run_stream', return_value=[final_step]):
+        result = core_agent_instance.run(task, return_full_result=True)
+
+    assert isinstance(result, core_agent_module.RunResult)
+    assert result.token_usage is None
+    core_agent_module.TokenUsage.assert_not_called()
+    assert result.state == "max_steps_error"
+    core_agent_instance.memory.get_full_steps.assert_called_once()
 
 
 def test_run_without_python_executor(core_agent_instance):
@@ -1204,6 +1534,10 @@ def test_run_without_python_executor(core_agent_instance):
     core_agent_instance.monitor = MagicMock()
     core_agent_instance.monitor.reset = MagicMock()
     core_agent_instance.logger = MagicMock()
+    core_agent_instance.logger.log = MagicMock()
+    core_agent_instance.logger.log_task = MagicMock()
+    core_agent_instance.logger.log_markdown = MagicMock()
+    core_agent_instance.logger.log_code = MagicMock()
     core_agent_instance.model = MagicMock()
     core_agent_instance.model.model_id = "test-model"
     core_agent_instance.name = "test_agent"
@@ -1213,8 +1547,7 @@ def test_run_without_python_executor(core_agent_instance):
     core_agent_instance.observer = MagicMock()
 
     # Mock _run_stream to return a simple result
-    mock_final_step = MagicMock()
-    mock_final_step.final_answer = "final result"
+    mock_final_step = MockFinalAnswerStep(output="final result")
 
     with patch.object(core_agent_instance, '_run_stream', return_value=[mock_final_step]):
         # Execute
@@ -1267,6 +1600,31 @@ def test_call_method_success(core_agent_instance):
             "test_agent", ProcessType.AGENT_FINISH, "test result")
 
 
+def test_call_method_with_run_result_return(core_agent_instance):
+    """Test __call__ handles RunResult by extracting its output."""
+    task = "test task"
+    core_agent_instance.name = "test_agent"
+    core_agent_instance.state = {}
+    core_agent_instance.prompt_templates = {
+        "managed_agent": {
+            "task": "Task: {{task}}",
+            "report": "Report: {{final_answer}}"
+        }
+    }
+    core_agent_instance.provide_run_summary = False
+    core_agent_instance.observer = MagicMock()
+
+    run_result = core_agent_module.RunResult(output="run result", token_usage=None, steps=[], timing=None, state="success")
+    with patch.object(core_agent_instance, 'run', return_value=run_result) as mock_run:
+        result = core_agent_instance(task)
+
+    assert "Report: run result" in result
+    mock_run.assert_called_once()
+    core_agent_instance.observer.add_message.assert_called_with(
+        "test_agent", ProcessType.AGENT_FINISH, "run result"
+    )
+
+
 def test_call_method_with_run_summary(core_agent_instance):
     """Test __call__ method with provide_run_summary=True."""
     # Setup
@@ -1284,10 +1642,14 @@ def test_call_method_with_run_summary(core_agent_instance):
     core_agent_instance.provide_run_summary = True
     core_agent_instance.observer = MagicMock()
 
-    # Mock write_memory_to_messages to return some simple messages
+    # Mock write_memory_to_messages to return some simple messages with .content attribute
+    class MockMessage:
+        def __init__(self, content):
+            self.content = content
+    
     mock_messages = [
-        {"content": "msg1"},
-        {"content": "msg2"}
+        MockMessage("msg1"),
+        MockMessage("msg2")
     ]
     core_agent_instance.write_memory_to_messages = MagicMock(
         return_value=mock_messages)
diff --git a/test/sdk/core/agents/test_nexent_agent.py b/test/sdk/core/agents/test_nexent_agent.py
index 3dc831323..2a842ea72 100644
--- a/test/sdk/core/agents/test_nexent_agent.py
+++ b/test/sdk/core/agents/test_nexent_agent.py
@@ -27,11 +27,16 @@
 
 
 class _ActionStep:
-    pass
+    def __init__(self, step_number=None, timing=None, action_output=None, model_output=None):
+        self.step_number = step_number
+        self.timing = timing
+        self.action_output = action_output
+        self.model_output = model_output
 
 
 class _TaskStep:
-    pass
+    def __init__(self, task=None):
+        self.task = task
 
 
 class _AgentText:
@@ -214,6 +219,8 @@ class _MockToolSign:
     "nexent.storage": mock_nexent_storage_module,
     "nexent.multi_modal": mock_nexent_multi_modal_module,
     "nexent.multi_modal.load_save_object": mock_nexent_load_save_module,
+    # Mock tiktoken to avoid importing the real package when models import it
+    "tiktoken": MagicMock(),
     # Mock the OpenAIModel import
     "sdk.nexent.core.models.openai_llm": MagicMock(OpenAIModel=mock_openai_model_class),
     # Mock CoreAgent import
@@ -230,7 +237,7 @@ class _MockToolSign:
     from sdk.nexent.core.utils.observer import MessageObserver, ProcessType
     from sdk.nexent.core.agents import nexent_agent
     from sdk.nexent.core.agents.nexent_agent import NexentAgent, ActionStep, TaskStep
-    from sdk.nexent.core.agents.agent_model import ToolConfig, ModelConfig, AgentConfig
+    from sdk.nexent.core.agents.agent_model import ToolConfig, ModelConfig, AgentConfig, AgentHistory
 
 
 # ----------------------------------------------------------------------------
@@ -1087,6 +1094,48 @@ def test_add_history_to_agent_none_history(nexent_agent_instance, mock_core_agen
     assert len(mock_core_agent.memory.steps) == 0
 
 
+def test_add_history_to_agent_user_and_assistant_history(nexent_agent_instance, mock_core_agent):
+    """Test add_history_to_agent correctly converts user and assistant messages to memory steps."""
+    nexent_agent_instance.agent = mock_core_agent
+
+    user_msg = AgentHistory(role="user", content="User question")
+    assistant_msg = AgentHistory(role="assistant", content="Assistant reply")
+
+    nexent_agent_instance.add_history_to_agent([user_msg, assistant_msg])
+
+    mock_core_agent.memory.reset.assert_called_once()
+    assert len(mock_core_agent.memory.steps) == 2
+
+    # First step should be a TaskStep for the user message
+    first_step = mock_core_agent.memory.steps[0]
+    assert isinstance(first_step, TaskStep)
+    assert first_step.task == "User question"
+
+    # Second step should be an ActionStep for the assistant message
+    second_step = mock_core_agent.memory.steps[1]
+    assert isinstance(second_step, ActionStep)
+    assert second_step.action_output == "Assistant reply"
+    assert second_step.model_output == "Assistant reply"
+
+
+def test_add_history_to_agent_invalid_agent_type(nexent_agent_instance):
+    """Test add_history_to_agent raises TypeError when agent is not a CoreAgent."""
+    nexent_agent_instance.agent = "not_core_agent"
+
+    with pytest.raises(TypeError, match="agent must be a CoreAgent object"):
+        nexent_agent_instance.add_history_to_agent([])
+
+
+def test_add_history_to_agent_invalid_history_items(nexent_agent_instance, mock_core_agent):
+    """Test add_history_to_agent raises TypeError when history items are not AgentHistory."""
+    nexent_agent_instance.agent = mock_core_agent
+
+    invalid_history = [{"role": "user", "content": "hello"}]
+
+    with pytest.raises(TypeError, match="history must be a list of AgentHistory objects"):
+        nexent_agent_instance.add_history_to_agent(invalid_history)
+
+
 def test_agent_run_with_observer_success_with_agent_text(nexent_agent_instance, mock_core_agent):
     """Test successful agent_run_with_observer with AgentText final answer."""
     # Setup
@@ -1103,7 +1152,7 @@ def test_agent_run_with_observer_success_with_agent_text(nexent_agent_instance,
         "Final answer with <think>thinking</think> content")
 
     mock_core_agent.run.return_value = [mock_action_step]
-    mock_core_agent.run.return_value[-1].final_answer = mock_final_answer
+    mock_core_agent.run.return_value[-1].output = mock_final_answer
 
     # Execute
     nexent_agent_instance.agent_run_with_observer("test query")
@@ -1129,7 +1178,7 @@ def test_agent_run_with_observer_success_with_string_final_answer(nexent_agent_i
     mock_action_step.error = None
 
     mock_core_agent.run.return_value = [mock_action_step]
-    mock_core_agent.run.return_value[-1].final_answer = "String final answer with <think>thinking</think>"
+    mock_core_agent.run.return_value[-1].output = "String final answer with <think>thinking</think>"
 
     # Execute
     nexent_agent_instance.agent_run_with_observer("test query")
@@ -1153,7 +1202,7 @@ def test_agent_run_with_observer_with_error_in_step(nexent_agent_instance, mock_
     mock_action_step.error = "Test error occurred"
 
     mock_core_agent.run.return_value = [mock_action_step]
-    mock_core_agent.run.return_value[-1].final_answer = "Final answer"
+    mock_core_agent.run.return_value[-1].output = "Final answer"
 
     # Execute
     nexent_agent_instance.agent_run_with_observer("test query")
@@ -1176,7 +1225,7 @@ def test_agent_run_with_observer_skips_non_action_step(nexent_agent_instance, mo
     mock_action_step.error = None
 
     mock_core_agent.run.return_value = [mock_task_step, mock_action_step]
-    mock_core_agent.run.return_value[-1].final_answer = "Final answer"
+    mock_core_agent.run.return_value[-1].output = "Final answer"
 
     # Execute
     nexent_agent_instance.agent_run_with_observer("test query")
@@ -1199,7 +1248,7 @@ def test_agent_run_with_observer_with_stop_event_set(nexent_agent_instance, mock
     mock_action_step.error = None
 
     mock_core_agent.run.return_value = [mock_action_step]
-    mock_core_agent.run.return_value[-1].final_answer = "Final answer"
+    mock_core_agent.run.return_value[-1].output = "Final answer"
 
     # Execute
     nexent_agent_instance.agent_run_with_observer("test query")
@@ -1226,6 +1275,14 @@ def test_agent_run_with_observer_with_exception(nexent_agent_instance, mock_core
     )
 
 
+def test_agent_run_with_observer_invalid_agent_type(nexent_agent_instance):
+    """Test agent_run_with_observer raises TypeError when agent is not a CoreAgent."""
+    nexent_agent_instance.agent = "not_core_agent"
+
+    with pytest.raises(TypeError, match="agent must be a CoreAgent object"):
+        nexent_agent_instance.agent_run_with_observer("test query")
+
+
 def test_agent_run_with_observer_with_reset_false(nexent_agent_instance, mock_core_agent):
     """Test agent_run_with_observer with reset=False parameter."""
     # Setup
@@ -1238,7 +1295,7 @@ def test_agent_run_with_observer_with_reset_false(nexent_agent_instance, mock_co
     mock_action_step.error = None
 
     mock_core_agent.run.return_value = [mock_action_step]
-    mock_core_agent.run.return_value[-1].final_answer = "Final answer"
+    mock_core_agent.run.return_value[-1].output = "Final answer"
 
     # Execute with reset=False
     nexent_agent_instance.agent_run_with_observer("test query", reset=False)
diff --git a/test/sdk/core/agents/test_run_agent.py b/test/sdk/core/agents/test_run_agent.py
index 0cafdd8a1..b47aec879 100644
--- a/test/sdk/core/agents/test_run_agent.py
+++ b/test/sdk/core/agents/test_run_agent.py
@@ -49,7 +49,7 @@ def from_mcp(cls, *args, **kwargs):  # pylint: disable=unused-argument
     sub_mod = ModuleType(f"smolagents.{_sub}")
     # Populate required attributes with MagicMocks to satisfy import-time `from smolagents.<sub> import ...`.
     if _sub == "agents":
-        for _name in ["CodeAgent", "populate_template", "handle_agent_output_types", "AgentError", "AgentType"]:
+        for _name in ["CodeAgent", "populate_template", "handle_agent_output_types", "AgentError", "AgentType", "ActionOutput", "RunResult"]:
             setattr(sub_mod, _name, MagicMock(name=f"smolagents.agents.{_name}"))
     elif _sub == "local_python_executor":
         setattr(sub_mod, "fix_final_answer_code", MagicMock(name="fix_final_answer_code"))
@@ -59,6 +59,7 @@ def from_mcp(cls, *args, **kwargs):  # pylint: disable=unused-argument
     elif _sub == "models":
         setattr(sub_mod, "ChatMessage", MagicMock(name="smolagents.models.ChatMessage"))
         setattr(sub_mod, "MessageRole", MagicMock(name="smolagents.models.MessageRole"))
+        setattr(sub_mod, "CODEAGENT_RESPONSE_FORMAT", MagicMock(name="smolagents.models.CODEAGENT_RESPONSE_FORMAT"))
         # Provide a simple base class so that OpenAIModel can inherit from it
         class _DummyOpenAIServerModel:
             def __init__(self, *args, **kwargs):
@@ -67,13 +68,18 @@ def __init__(self, *args, **kwargs):
         setattr(sub_mod, "OpenAIServerModel", _DummyOpenAIServerModel)
     elif _sub == "monitoring":
         setattr(sub_mod, "LogLevel", MagicMock(name="smolagents.monitoring.LogLevel"))
+        setattr(sub_mod, "Timing", MagicMock(name="smolagents.monitoring.Timing"))
+        setattr(sub_mod, "YELLOW_HEX", MagicMock(name="smolagents.monitoring.YELLOW_HEX"))
+        setattr(sub_mod, "TokenUsage", MagicMock(name="smolagents.monitoring.TokenUsage"))
     elif _sub == "utils":
         for _name in [
             "AgentExecutionError",
             "AgentGenerationError",
             "AgentParsingError",
+            "AgentMaxStepsError",
             "parse_code_blobs",
             "truncate_content",
+            "extract_code_from_text",
         ]:
             setattr(sub_mod, _name, MagicMock(name=f"smolagents.utils.{_name}"))
     setattr(mock_smolagents, _sub, sub_mod)
@@ -82,6 +88,8 @@ def __init__(self, *args, **kwargs):
 # Top-level exports expected directly from `smolagents` by nexent_agent.py
 for _name in ["ActionStep", "TaskStep", "AgentText", "handle_agent_output_types"]:
     setattr(mock_smolagents, _name, MagicMock(name=f"smolagents.{_name}"))
+# Export Timing from monitoring submodule to top-level
+setattr(mock_smolagents, "Timing", mock_smolagents.monitoring.Timing)
 # Also export Tool at top-level so that `from smolagents import Tool` works
 setattr(mock_smolagents, "Tool", mock_smolagents_tool_cls)
 
@@ -237,9 +245,9 @@ def test_agent_run_thread_local_flow(basic_agent_run_info, monkeypatch):
 
 
 def test_agent_run_thread_mcp_flow(basic_agent_run_info, mock_memory_context, monkeypatch):
-    """Verify behaviour when an MCP host list is provided."""
-    # Give the AgentRunInfo an MCP host list
-    basic_agent_run_info.mcp_host = ["http://mcp.server"]
+    """Verify behaviour when an MCP host list is provided with auto-detected transport."""
+    # Give the AgentRunInfo an MCP host list (string format, auto-detect transport)
+    basic_agent_run_info.mcp_host = ["http://mcp.server/mcp"]
 
     # Prepare ToolCollection.from_mcp to return a context manager
     mock_tool_collection = MagicMock(name="ToolCollectionInstance")
@@ -257,7 +265,7 @@ def test_agent_run_thread_mcp_flow(basic_agent_run_info, mock_memory_context, mo
     basic_agent_run_info.observer.add_message.assert_any_call("", ProcessType.AGENT_NEW_RUN, "<MCP_START>")
 
     # ToolCollection.from_mcp should be called with the expected client list and trust_remote_code=True
-    expected_client_list = [{"url": "http://mcp.server"}]
+    expected_client_list = [{"url": "http://mcp.server/mcp", "transport": "streamable-http"}]
     run_agent.ToolCollection.from_mcp.assert_called_once_with(expected_client_list, trust_remote_code=True)
 
     # NexentAgent should be instantiated with mcp_tool_collection
@@ -275,6 +283,116 @@ def test_agent_run_thread_mcp_flow(basic_agent_run_info, mock_memory_context, mo
     mock_nexent_instance.agent_run_with_observer.assert_called_once_with(query=basic_agent_run_info.query, reset=False)
 
 
+def test_agent_run_thread_mcp_flow_with_explicit_transport(basic_agent_run_info, mock_memory_context, monkeypatch):
+    """Verify behaviour when MCP host is provided with explicit transport in dict format."""
+    # Give the AgentRunInfo an MCP host list with explicit transport
+    basic_agent_run_info.mcp_host = [{"url": "http://mcp.server", "transport": "sse"}]
+
+    # Prepare ToolCollection.from_mcp to return a context manager
+    mock_tool_collection = MagicMock(name="ToolCollectionInstance")
+    mock_context_manager = MagicMock(__enter__=MagicMock(return_value=mock_tool_collection), __exit__=MagicMock(return_value=None))
+    monkeypatch.setattr(run_agent.ToolCollection, "from_mcp", MagicMock(return_value=mock_context_manager))
+
+    # Patch NexentAgent
+    mock_nexent_instance = MagicMock(name="NexentAgentInstance")
+    monkeypatch.setattr(run_agent, "NexentAgent", MagicMock(return_value=mock_nexent_instance))
+
+    # Execute
+    run_agent.agent_run_thread(basic_agent_run_info)
+
+    # ToolCollection.from_mcp should be called with the expected client list
+    expected_client_list = [{"url": "http://mcp.server", "transport": "sse"}]
+    run_agent.ToolCollection.from_mcp.assert_called_once_with(expected_client_list, trust_remote_code=True)
+
+
+def test_agent_run_thread_mcp_flow_mixed_formats(basic_agent_run_info, mock_memory_context, monkeypatch):
+    """Verify behaviour when MCP host list contains both string and dict formats."""
+    # Mix of string (auto-detect) and dict (explicit) formats
+    basic_agent_run_info.mcp_host = [
+        "http://mcp1.server/mcp",  # Auto-detect: streamable-http
+        "http://mcp2.server/sse",  # Auto-detect: sse
+        {"url": "http://mcp3.server/mcp", "transport": "streamable-http"},  # Explicit: streamable-http
+    ]
+
+    # Prepare ToolCollection.from_mcp to return a context manager
+    mock_tool_collection = MagicMock(name="ToolCollectionInstance")
+    mock_context_manager = MagicMock(__enter__=MagicMock(return_value=mock_tool_collection), __exit__=MagicMock(return_value=None))
+    monkeypatch.setattr(run_agent.ToolCollection, "from_mcp", MagicMock(return_value=mock_context_manager))
+
+    # Patch NexentAgent
+    mock_nexent_instance = MagicMock(name="NexentAgentInstance")
+    monkeypatch.setattr(run_agent, "NexentAgent", MagicMock(return_value=mock_nexent_instance))
+
+    # Execute
+    run_agent.agent_run_thread(basic_agent_run_info)
+
+    # ToolCollection.from_mcp should be called with normalized client list
+    expected_client_list = [
+        {"url": "http://mcp1.server/mcp", "transport": "streamable-http"},
+        {"url": "http://mcp2.server/sse", "transport": "sse"},
+        {"url": "http://mcp3.server/mcp", "transport": "streamable-http"},
+    ]
+    run_agent.ToolCollection.from_mcp.assert_called_once_with(expected_client_list, trust_remote_code=True)
+
+
+def test_detect_transport():
+    """Test transport auto-detection logic based on URL ending."""
+    # Test URLs ending with /sse
+    assert run_agent._detect_transport("http://server/sse") == "sse"
+    assert run_agent._detect_transport("https://api.example.com/sse") == "sse"
+    assert run_agent._detect_transport("http://localhost:3000/sse") == "sse"
+    
+    # Test URLs ending with /mcp
+    assert run_agent._detect_transport("http://server/mcp") == "streamable-http"
+    assert run_agent._detect_transport("https://api.example.com/mcp") == "streamable-http"
+    assert run_agent._detect_transport("http://localhost:3000/mcp") == "streamable-http"
+    
+    # Test default fallback (no /sse or /mcp ending)
+    assert run_agent._detect_transport("http://server") == "streamable-http"
+    assert run_agent._detect_transport("https://api.example.com") == "streamable-http"
+    assert run_agent._detect_transport("http://server/other") == "streamable-http"
+
+
+def test_normalize_mcp_config():
+    """Test MCP configuration normalization."""
+    # Test string format (auto-detect based on URL ending)
+    result = run_agent._normalize_mcp_config("http://server/mcp")
+    assert result == {"url": "http://server/mcp", "transport": "streamable-http"}
+    
+    result = run_agent._normalize_mcp_config("http://server/sse")
+    assert result == {"url": "http://server/sse", "transport": "sse"}
+    
+    # Test string format without /sse or /mcp ending (defaults to streamable-http)
+    result = run_agent._normalize_mcp_config("http://server")
+    assert result == {"url": "http://server", "transport": "streamable-http"}
+    
+    # Test dict format with explicit transport
+    result = run_agent._normalize_mcp_config({"url": "http://server/mcp", "transport": "sse"})
+    assert result == {"url": "http://server/mcp", "transport": "sse"}
+    
+    # Test dict format without transport (auto-detect)
+    result = run_agent._normalize_mcp_config({"url": "http://server/sse"})
+    assert result == {"url": "http://server/sse", "transport": "sse"}
+    
+    result = run_agent._normalize_mcp_config({"url": "http://server/mcp"})
+    assert result == {"url": "http://server/mcp", "transport": "streamable-http"}
+    
+    # Test invalid dict (missing url)
+    with pytest.raises(ValueError, match="must contain 'url' key"):
+        run_agent._normalize_mcp_config({"transport": "sse"})
+    
+    # Test invalid transport type
+    with pytest.raises(ValueError, match="Invalid transport type"):
+        run_agent._normalize_mcp_config({"url": "http://server/mcp", "transport": "stdio"})
+    
+    with pytest.raises(ValueError, match="Invalid transport type"):
+        run_agent._normalize_mcp_config({"url": "http://server/mcp", "transport": "invalid"})
+    
+    # Test invalid type
+    with pytest.raises(ValueError, match="Invalid MCP host item type"):
+        run_agent._normalize_mcp_config(123)
+
+
 def test_agent_run_thread_handles_internal_exception(basic_agent_run_info, mock_memory_context, monkeypatch):
     """If an internal error occurs, the observer should be notified and a ValueError propagated."""
     # Configure NexentAgent.create_single_agent to raise an exception