Skip to content

Commit 2f8f9ae

Browse files
authored
feat: add reasoning effort (#62)
* fix: update context_length * feat: add reasoning effort * chore: throw error when invalid keys are given for reasoning effort * fix: sdk version * nit: context_length
1 parent 685eb8b commit 2f8f9ae

File tree

3 files changed

+16
-4
lines changed

3 files changed

+16
-4
lines changed

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
lightning_sdk >= 2025.09.10
1+
lightning_sdk >= 2025.09.11

src/litai/llm.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import os
2020
import threading
2121
import warnings
22-
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union
22+
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Sequence, Union
2323

2424
import requests
2525
from lightning_sdk.lightning_cloud.openapi import V1ConversationResponseChunk
@@ -219,6 +219,7 @@ def _model_call(
219219
tools: Optional[Sequence[Union[str, Dict[str, Any]]]] = None,
220220
lit_tools: Optional[List[LitTool]] = None,
221221
auto_call_tools: bool = False,
222+
reasoning_effort: Optional[str] = None,
222223
**kwargs: Any,
223224
) -> str:
224225
"""Handles the model call and logs appropriate messages."""
@@ -238,6 +239,7 @@ def _model_call(
238239
stream=stream,
239240
full_response=full_response,
240241
tools=tools,
242+
reasoning_effort=reasoning_effort,
241243
**kwargs,
242244
)
243245
if tools and isinstance(response, V1ConversationResponseChunk):
@@ -251,7 +253,8 @@ def context_length(self, model: Optional[str] = None) -> int:
251253
self._wait_for_model()
252254
assert self._llm is not None, "LLM backend must be initialized"
253255
if not model:
254-
return self._llm.context_length
256+
# TODO use metadata
257+
return self._llm.get_context_length(self._model)
255258
return self._llm.get_context_length(model)
256259

257260
def chat( # noqa: D417
@@ -266,6 +269,7 @@ def chat( # noqa: D417
266269
stream: bool = False,
267270
tools: Optional[Sequence[Union[LitTool, "StructuredTool"]]] = None,
268271
auto_call_tools: bool = False,
272+
reasoning_effort: Optional[Literal["low", "medium", "high"]] = None,
269273
**kwargs: Any,
270274
) -> str:
271275
"""Sends a message to the LLM and retrieves a response.
@@ -286,11 +290,14 @@ def chat( # noqa: D417
286290
categorized by conversation ID.
287291
full_response (bool): Whether the entire response should be returned from the chat.
288292
auto_call_tools (bool): Tools will be executed automatically whenever applicable. Defaults to False.
293+
reasoning_effort (Optional[Literal["low", "medium", "high"]]): The level of reasoning effort for the model.
289294
**kwargs (Any): Additional keyword arguments
290295
291296
Returns:
292297
str: The response from the LLM.
293298
"""
299+
if reasoning_effort is not None and reasoning_effort not in ["low", "medium", "high"]:
300+
raise ValueError("reasoning_effort must be 'low', 'medium', 'high', or None")
294301
self._wait_for_model()
295302
lit_tools = LitTool.convert_tools(tools)
296303
processed_tools = [tool.as_tool() for tool in lit_tools] if lit_tools else None
@@ -314,6 +321,7 @@ def chat( # noqa: D417
314321
tools=processed_tools,
315322
lit_tools=lit_tools,
316323
auto_call_tools=auto_call_tools,
324+
reasoning_effort=reasoning_effort,
317325
**kwargs,
318326
)
319327
except Exception as e:
@@ -336,6 +344,7 @@ def chat( # noqa: D417
336344
tools=processed_tools,
337345
lit_tools=lit_tools,
338346
auto_call_tools=auto_call_tools,
347+
reasoning_effort=reasoning_effort,
339348
**kwargs,
340349
)
341350

tests/test_llm.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def test_llm_context_length(mock_llm_class):
9292

9393
LLMCLIENT._sdkllm_cache.clear()
9494
mock_llm_instance = MagicMock()
95-
mock_llm_instance.context_length = 8000
95+
mock_llm_instance.get_context_length.return_value = 8000
9696

9797
mock_llm_class.return_value = mock_llm_instance
9898

@@ -134,6 +134,7 @@ def test_llm_chat(mock_llm_class):
134134
full_response=False,
135135
my_kwarg="test-kwarg",
136136
tools=None,
137+
reasoning_effort=None,
137138
)
138139
test_kwargs = mock_llm_instance.chat.call_args.kwargs
139140
assert test_kwargs.get("my_kwarg") == "test-kwarg"
@@ -190,6 +191,7 @@ def mock_llm_constructor(name, teamspace="default-teamspace", **kwargs):
190191
stream=False,
191192
full_response=True,
192193
tools=None,
194+
reasoning_effort=None,
193195
)
194196

195197

@@ -240,6 +242,7 @@ def mock_llm_constructor(name, teamspace="default-teamspace", **kwargs):
240242
stream=False,
241243
full_response=False,
242244
tools=None,
245+
reasoning_effort=None,
243246
)
244247

245248

0 commit comments

Comments
 (0)