lastmile-ai · ZaneH · Aug 7, 2025 · Aug 7, 2025 · Aug 14, 2025 · Aug 15, 2025
diff --git a/schema/mcp-agent.config.schema.json b/schema/mcp-agent.config.schema.json
@@ -701,6 +701,92 @@
       "title": "MCPSettings",
       "type": "object"
     },
+    "OllamaSettings": {
+      "additionalProperties": true,
+      "description": "Settings for using Ollama models in the MCP Agent application.\nInherits from OpenAISettings to maintain compatibility with OpenAI API.",
+      "properties": {
+        "api_key": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Api Key"
+        },
+        "reasoning_effort": {
+          "default": "medium",
+          "enum": [
+            "low",
+            "medium",
+            "high"
+          ],
+          "title": "Reasoning Effort",
+          "type": "string"
+        },
+        "base_url": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Base Url"
+        },
+        "user": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "User"
+        },
+        "default_headers": {
+          "anyOf": [
+            {
+              "additionalProperties": {
+                "type": "string"
+              },
+              "type": "object"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Default Headers"
+        },
+        "default_model": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Default Model"
+        },
+        "think": {
+          "default": true,
+          "title": "Think",
+          "type": "boolean"
+        }
+      },
+      "title": "OllamaSettings",
+      "type": "object"
+    },
     "OpenAISettings": {
       "additionalProperties": true,
       "description": "Settings for using OpenAI models in the MCP Agent application.",
@@ -1091,6 +1177,18 @@
       "default": null,
       "description": "Settings for using OpenAI models in the MCP Agent application"
     },
+    "ollama": {
+      "anyOf": [
+        {
+          "$ref": "#/$defs/OllamaSettings"
+        },
+        {
+          "type": "null"
+        }
+      ],
+      "default": null,
+      "description": "Settings for using Ollama models in the MCP Agent application"
+    },
     "azure": {
       "anyOf": [
         {

diff --git a/src/mcp_agent/config.py b/src/mcp_agent/config.py
@@ -173,6 +173,15 @@ class OpenAISettings(BaseModel):
 
     model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
 
+class OllamaSettings(OpenAISettings):
+    """
+    Settings for using Ollama models in the MCP Agent application.
+    Inherits from OpenAISettings to maintain compatibility with OpenAI API.
+    """
+
+    think: bool = True
+
+    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
 
 class AzureSettings(BaseModel):
     """
@@ -407,6 +416,9 @@ class Settings(BaseSettings):
     openai: OpenAISettings | None = None
     """Settings for using OpenAI models in the MCP Agent application"""
 
+    ollama: OllamaSettings | None = None
+    """Settings for using Ollama models in the MCP Agent application"""
+
     azure: AzureSettings | None = None
     """Settings for using Azure models in the MCP Agent application"""
 

diff --git a/src/mcp_agent/workflows/llm/augmented_llm.py b/src/mcp_agent/workflows/llm/augmented_llm.py
@@ -163,6 +163,12 @@ class RequestParams(CreateMessageRequestParams):
     This is used to stably identify the user in the LLM provider's logs.
     """
 
+    think: bool = True
+    """
+    Enable thinking/reasoning mode for models that support it (like deepseek-r1 on Ollama).
+    When enabled, the model will show its reasoning process.
+    """
+
 
 class AugmentedLLMProtocol(Protocol, Generic[MessageParamT, MessageT]):
     """Protocol defining the interface for augmented LLMs"""
@@ -552,6 +558,8 @@ def annotate_span_with_request_params(
             span.set_attribute(GEN_AI_REQUEST_TEMPERATURE, request_params.temperature)
         if hasattr(request_params, "use_history"):
             span.set_attribute("request_params.use_history", request_params.use_history)
+        if hasattr(request_params, "think"):
+            span.set_attribute("request_params.think", request_params.think)
         if hasattr(request_params, "parallel_tool_calls"):
             span.set_attribute(
                 "request_params.parallel_tool_calls", request_params.parallel_tool_calls

diff --git a/src/mcp_agent/workflows/llm/augmented_llm_ollama.py b/src/mcp_agent/workflows/llm/augmented_llm_ollama.py
@@ -1,18 +1,29 @@
-from typing import Type
-
+from typing import Type, Any, List
+from pydantic import BaseModel
 from openai import OpenAI
-
+from openai.types.chat import ChatCompletionMessageParam, ChatCompletionToolParam
+from mcp_agent.config import OllamaSettings
 from mcp_agent.executor.workflow_task import workflow_task
+from mcp_agent.tracing.token_tracking_decorator import track_tokens
 from mcp_agent.utils.pydantic_type_serializer import serialize_model, deserialize_model
 from mcp_agent.workflows.llm.augmented_llm import (
     ModelT,
     RequestParams,
 )
-from mcp_agent.workflows.llm.augmented_llm_openai import (
-    OpenAIAugmentedLLM,
-    RequestStructuredCompletionRequest,
-)
+from mcp_agent.workflows.llm.augmented_llm_openai import OpenAIAugmentedLLM
+
+class RequestCompletionRequest(BaseModel):
+    config: OllamaSettings
+    payload: dict
+
 
+class RequestStructuredCompletionRequest(BaseModel):
+    config: OllamaSettings
+    response_model: Any | None = None
+    serialized_response_model: str | None = None
+    response_str: str
+    model: str
+    user: str | None = None
 
 class OllamaAugmentedLLM(OpenAIAugmentedLLM):
     """
@@ -33,6 +44,48 @@ def __init__(self, *args, **kwargs):
 
         self.provider = "Ollama"
 
+    def _build_request_arguments(
+        self,
+        model: str,
+        messages: List[ChatCompletionMessageParam],
+        available_tools: List[ChatCompletionToolParam] | None,
+        user: str | None,
+        params: RequestParams,
+    ) -> dict:
+        """Build arguments dict for API completion request, adding Ollama-specific think parameter."""
+        arguments = super()._build_request_arguments(
+            model=model,
+            messages=messages,
+            available_tools=available_tools,
+            user=user,
+            params=params,
+        )
+
+        # Add think parameter if specified
+        if hasattr(params, 'think') and params.think is not None:
+            arguments["think"] = params.think
+
+        return arguments
+
+    def _create_completion_request(self, arguments: dict) -> RequestCompletionRequest:
+        """Create Ollama-specific RequestCompletionRequest object."""
+        return RequestCompletionRequest(
+            config=self.context.config.ollama,
+            payload=arguments,
+        )
+
+    def _get_completion_task(self):
+        """Get the Ollama completion task to use for API calls."""
+        return OllamaCompletionTasks.request_completion_task
+
+    @track_tokens()
+    async def generate(
+            self,
+            message,
+            request_params: RequestParams | None = None,
+    ):
+        return await super().generate(message, request_params)
+
     async def generate_structured(
         self,
         message,
@@ -61,7 +114,7 @@ async def generate_structured(
         structured_response = await self.executor.execute(
             OllamaCompletionTasks.request_structured_completion_task,
             RequestStructuredCompletionRequest(
-                config=self.context.config.openai,
+                config=self.context.config.ollama,
                 response_model=response_model
                 if not serialized_response_model
                 else None,
@@ -80,6 +133,38 @@ async def generate_structured(
 
 
 class OllamaCompletionTasks:
+    @staticmethod
+    @workflow_task
+    async def request_completion_task(
+        request: RequestCompletionRequest,
+    ):
+        """
+        Request a completion from Ollama's OpenAI-compatible API.
+        """
+        from openai.types.chat import ChatCompletion
+        from mcp_agent.utils.common import ensure_serializable
+
+        openai_client = OpenAI(
+            api_key=request.config.api_key,
+            base_url=request.config.base_url,
+            http_client=request.config.http_client
+            if hasattr(request.config, "http_client")
+            else None,
+        )
+
+        payload = request.payload.copy()
+
+        # Extract Ollama-specific parameters that the OpenAI client doesn't understand
+        think = payload.pop('think', None)
+
+        # TODO: Investigate how to properly pass 'think' parameter to Ollama
+        # For now, we'll skip it to avoid the OpenAI client error
+        # The think parameter might need to be passed differently to Ollama's API
+
+        response = openai_client.chat.completions.create(**payload)
+        response = ensure_serializable(response)
+        return response
+
     @staticmethod
     @workflow_task
     async def request_structured_completion_task(