1919import os
2020import threading
2121import warnings
22- from typing import TYPE_CHECKING , Any , Dict , List , Optional , Sequence , Union
22+ from typing import TYPE_CHECKING , Any , Dict , List , Literal , Optional , Sequence , Union
2323
2424import requests
2525from lightning_sdk .lightning_cloud .openapi import V1ConversationResponseChunk
@@ -219,6 +219,7 @@ def _model_call(
219219 tools : Optional [Sequence [Union [str , Dict [str , Any ]]]] = None ,
220220 lit_tools : Optional [List [LitTool ]] = None ,
221221 auto_call_tools : bool = False ,
222+ reasoning_effort : Optional [str ] = None ,
222223 ** kwargs : Any ,
223224 ) -> str :
224225 """Handles the model call and logs appropriate messages."""
@@ -238,6 +239,7 @@ def _model_call(
238239 stream = stream ,
239240 full_response = full_response ,
240241 tools = tools ,
242+ reasoning_effort = reasoning_effort ,
241243 ** kwargs ,
242244 )
243245 if tools and isinstance (response , V1ConversationResponseChunk ):
@@ -251,7 +253,8 @@ def context_length(self, model: Optional[str] = None) -> int:
251253 self ._wait_for_model ()
252254 assert self ._llm is not None , "LLM backend must be initialized"
253255 if not model :
254- return self ._llm .context_length
256+ # TODO use metadata
257+ return self ._llm .get_context_length (self ._model )
255258 return self ._llm .get_context_length (model )
256259
257260 def chat ( # noqa: D417
@@ -266,6 +269,7 @@ def chat( # noqa: D417
266269 stream : bool = False ,
267270 tools : Optional [Sequence [Union [LitTool , "StructuredTool" ]]] = None ,
268271 auto_call_tools : bool = False ,
272+ reasoning_effort : Optional [Literal ["low" , "medium" , "high" ]] = None ,
269273 ** kwargs : Any ,
270274 ) -> str :
271275 """Sends a message to the LLM and retrieves a response.
@@ -286,11 +290,14 @@ def chat( # noqa: D417
286290 categorized by conversation ID.
287291 full_response (bool): Whether the entire response should be returned from the chat.
288292 auto_call_tools (bool): Tools will be executed automatically whenever applicable. Defaults to False.
293+ reasoning_effort (Optional[Literal["low", "medium", "high"]]): The level of reasoning effort for the model.
289294 **kwargs (Any): Additional keyword arguments
290295
291296 Returns:
292297 str: The response from the LLM.
293298 """
299+ if reasoning_effort is not None and reasoning_effort not in ["low" , "medium" , "high" ]:
300+ raise ValueError ("reasoning_effort must be 'low', 'medium', 'high', or None" )
294301 self ._wait_for_model ()
295302 lit_tools = LitTool .convert_tools (tools )
296303 processed_tools = [tool .as_tool () for tool in lit_tools ] if lit_tools else None
@@ -314,6 +321,7 @@ def chat( # noqa: D417
314321 tools = processed_tools ,
315322 lit_tools = lit_tools ,
316323 auto_call_tools = auto_call_tools ,
324+ reasoning_effort = reasoning_effort ,
317325 ** kwargs ,
318326 )
319327 except Exception as e :
@@ -336,6 +344,7 @@ def chat( # noqa: D417
336344 tools = processed_tools ,
337345 lit_tools = lit_tools ,
338346 auto_call_tools = auto_call_tools ,
347+ reasoning_effort = reasoning_effort ,
339348 ** kwargs ,
340349 )
341350
0 commit comments