feat: add reasoning effort (#62)

k223kim · web-flow · commit 2f8f9ae6b6fa · 2025-09-11T18:32:44.000+01:00
* fix: update context_length

* feat: add reasoning effort

* chore: throw error when invalid keys are given for reasoning effort

* fix: sdk version

* nit: context_length
diff --git a/requirements.txt b/requirements.txt
@@ -1 +1 @@
-lightning_sdk >= 2025.09.10
+lightning_sdk >= 2025.09.11
diff --git a/src/litai/llm.py b/src/litai/llm.py
@@ -19,7 +19,7 @@
 import os
 import threading
 import warnings
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Sequence, Union
 
 import requests
 from lightning_sdk.lightning_cloud.openapi import V1ConversationResponseChunk
@@ -219,6 +219,7 @@ def _model_call(
         tools: Optional[Sequence[Union[str, Dict[str, Any]]]] = None,
         lit_tools: Optional[List[LitTool]] = None,
         auto_call_tools: bool = False,
+        reasoning_effort: Optional[str] = None,
         **kwargs: Any,
     ) -> str:
         """Handles the model call and logs appropriate messages."""
@@ -238,6 +239,7 @@ def _model_call(
             stream=stream,
             full_response=full_response,
             tools=tools,
+            reasoning_effort=reasoning_effort,
             **kwargs,
         )
         if tools and isinstance(response, V1ConversationResponseChunk):
@@ -251,7 +253,8 @@ def context_length(self, model: Optional[str] = None) -> int:
         self._wait_for_model()
         assert self._llm is not None, "LLM backend must be initialized"
         if not model:
-            return self._llm.context_length
+            # TODO use metadata
+            return self._llm.get_context_length(self._model)
         return self._llm.get_context_length(model)
 
     def chat(  # noqa: D417
@@ -266,6 +269,7 @@ def chat(  # noqa: D417
         stream: bool = False,
         tools: Optional[Sequence[Union[LitTool, "StructuredTool"]]] = None,
         auto_call_tools: bool = False,
+        reasoning_effort: Optional[Literal["low", "medium", "high"]] = None,
         **kwargs: Any,
     ) -> str:
         """Sends a message to the LLM and retrieves a response.
@@ -286,11 +290,14 @@ def chat(  # noqa: D417
             categorized by conversation ID.
             full_response (bool): Whether the entire response should be returned from the chat.
             auto_call_tools (bool): Tools will be executed automatically whenever applicable. Defaults to False.
+            reasoning_effort (Optional[Literal["low", "medium", "high"]]): The level of reasoning effort for the model.
             **kwargs (Any): Additional keyword arguments
 
         Returns:
             str: The response from the LLM.
         """
+        if reasoning_effort is not None and reasoning_effort not in ["low", "medium", "high"]:
+            raise ValueError("reasoning_effort must be 'low', 'medium', 'high', or None")
         self._wait_for_model()
         lit_tools = LitTool.convert_tools(tools)
         processed_tools = [tool.as_tool() for tool in lit_tools] if lit_tools else None
@@ -314,6 +321,7 @@ def chat(  # noqa: D417
                     tools=processed_tools,
                     lit_tools=lit_tools,
                     auto_call_tools=auto_call_tools,
+                    reasoning_effort=reasoning_effort,
                     **kwargs,
                 )
             except Exception as e:
@@ -336,6 +344,7 @@ def chat(  # noqa: D417
                         tools=processed_tools,
                         lit_tools=lit_tools,
                         auto_call_tools=auto_call_tools,
+                        reasoning_effort=reasoning_effort,
                         **kwargs,
                     )
 
diff --git a/tests/test_llm.py b/tests/test_llm.py
@@ -92,7 +92,7 @@ def test_llm_context_length(mock_llm_class):
 
     LLMCLIENT._sdkllm_cache.clear()
     mock_llm_instance = MagicMock()
-    mock_llm_instance.context_length = 8000
+    mock_llm_instance.get_context_length.return_value = 8000
 
     mock_llm_class.return_value = mock_llm_instance
 
@@ -134,6 +134,7 @@ def test_llm_chat(mock_llm_class):
         full_response=False,
         my_kwarg="test-kwarg",
         tools=None,
+        reasoning_effort=None,
     )
     test_kwargs = mock_llm_instance.chat.call_args.kwargs
     assert test_kwargs.get("my_kwarg") == "test-kwarg"
@@ -190,6 +191,7 @@ def mock_llm_constructor(name, teamspace="default-teamspace", **kwargs):
         stream=False,
         full_response=True,
         tools=None,
+        reasoning_effort=None,
     )
 
 
@@ -240,6 +242,7 @@ def mock_llm_constructor(name, teamspace="default-teamspace", **kwargs):
         stream=False,
         full_response=False,
         tools=None,
+        reasoning_effort=None,
     )
 
 

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-lightning_sdk >= 2025.09.10`
	`1`	`+lightning_sdk >= 2025.09.11`