fix: correct none reasoning mapping for small gemini models (#65)

Danidapena · web-flow · commit bef00d193c0a · 2025-09-18T11:40:07.000-05:00
* allow to pass none as reasoning effort

* bump sdk version

* fix: None maps to none on small gemini models

* fix: None maps to none on small gemini models

* fix rebase

* fix test
diff --git a/src/litai/llm.py b/src/litai/llm.py
@@ -41,6 +41,9 @@
     "google/gemini-2.5-flash",
 }
 
+# this nmodels support reasoning_effort='none'
+NONE_REASONING_MODELS = ["google/gemini-2.0-flash", "google/gemini-2.5-flash-lite-preview-06-17"]
+
 logger = logging.getLogger(__name__)
 
 
@@ -290,15 +293,19 @@ def chat(  # noqa: D417
             categorized by conversation ID.
             full_response (bool): Whether the entire response should be returned from the chat.
             auto_call_tools (bool): Tools will be executed automatically whenever applicable. Defaults to False.
-            reasoning_effort (Optional[Literal["none", "low", "medium", "high"]]):
-                The level of reasoning effort for the model.
+            reasoning_effort (Optional[Literal["low", "medium", "high"]]): The level of reasoning effort for the model.
             **kwargs (Any): Additional keyword arguments
 
         Returns:
             str: The response from the LLM.
         """
         if reasoning_effort is not None and reasoning_effort not in ["none", "low", "medium", "high"]:
             raise ValueError("reasoning_effort must be 'low', 'medium', 'high', or None")
+        if reasoning_effort is None and (
+            model in NONE_REASONING_MODELS or (self._model in NONE_REASONING_MODELS and model is None)
+        ):
+            reasoning_effort = "none"
+
         self._wait_for_model()
         lit_tools = LitTool.convert_tools(tools)
         processed_tools = [tool.as_tool() for tool in lit_tools] if lit_tools else None
diff --git a/tests/test_llm.py b/tests/test_llm.py
@@ -119,7 +119,50 @@ def test_llm_chat(mock_llm_class):
         system_prompt="You are a helpful assistant.",
         metadata={"user_api": "123456"},
         my_kwarg="test-kwarg",
-        reasoning_effort="none",
+        reasoning_effort=None,
+    )
+
+    assert isinstance(response, str)
+    assert "helpful" in response.lower()
+    mock_llm_instance.chat.assert_called_once_with(
+        prompt="Hello, who are you?",
+        system_prompt="You are a helpful assistant.",
+        max_completion_tokens=None,
+        images=None,
+        conversation=None,
+        metadata={"user_api": "123456"},
+        stream=False,
+        full_response=False,
+        my_kwarg="test-kwarg",
+        tools=None,
+        reasoning_effort=None,
+    )
+    test_kwargs = mock_llm_instance.chat.call_args.kwargs
+    assert test_kwargs.get("my_kwarg") == "test-kwarg"
+
+    llm.reset_conversation("test")
+    mock_llm_instance.reset_conversation.assert_called_once()
+
+
+@patch("litai.llm.SDKLLM")
+def test_reasoning_effort_override(mock_llm_class):
+    """Test LigtningLLM chat."""
+    from litai.llm import LLM as LLMCLIENT
+
+    LLMCLIENT._sdkllm_cache.clear()
+    mock_llm_instance = MagicMock()
+    mock_llm_instance.chat.return_value = "Hello! I am a helpful assistant."
+
+    mock_llm_class.return_value = mock_llm_instance
+
+    llm = LLM(model="google/gemini-2.0-flash")
+
+    response = llm.chat(
+        "Hello, who are you?",
+        system_prompt="You are a helpful assistant.",
+        metadata={"user_api": "123456"},
+        my_kwarg="test-kwarg",
+        reasoning_effort=None,
     )
 
     assert isinstance(response, str)