BerriAI
diff --git a/‎litellm/batches/main.py‎
Lines changed: 0 additions & 2 deletions b/‎litellm/batches/main.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎litellm/constants.py‎
Lines changed: 13 additions & 5 deletions b/‎litellm/constants.py‎
Lines changed: 13 additions & 5 deletions
diff --git a/‎litellm/integrations/custom_guardrail.py‎
Lines changed: 0 additions & 1 deletion b/‎litellm/integrations/custom_guardrail.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎litellm/litellm_core_utils/README.md‎
Lines changed: 1 addition & 0 deletions b/‎litellm/litellm_core_utils/README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎litellm/litellm_core_utils/api_route_to_call_types.py‎
Lines changed: 38 additions & 0 deletions b/‎litellm/litellm_core_utils/api_route_to_call_types.py‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎litellm/llms/base_llm/guardrail_translation/base_translation.py‎
Lines changed: 14 additions & 0 deletions b/‎litellm/llms/base_llm/guardrail_translation/base_translation.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎litellm/llms/openai/chat/guardrail_translation/handler.py‎
Lines changed: 82 additions & 16 deletions b/‎litellm/llms/openai/chat/guardrail_translation/handler.py‎
Lines changed: 82 additions & 16 deletions
diff --git a/‎litellm/model_prices_and_context_window_backup.json‎
Lines changed: 65 additions & 0 deletions b/‎litellm/model_prices_and_context_window_backup.json‎
Lines changed: 65 additions & 0 deletions
@@ -18,8 +18,6 @@
 
 import httpx
 from openai.types.batch import BatchRequestCounts
-from openai.types.batch import Metadata
-from openai.types.batch import Metadata as OpenAIBatchMetadata
 
 import litellm
 from litellm._logging import verbose_logger
 
@@ -262,7 +262,9 @@
 QDRANT_SCALAR_QUANTILE = float(os.getenv("QDRANT_SCALAR_QUANTILE", 0.99))
 QDRANT_VECTOR_SIZE = int(os.getenv("QDRANT_VECTOR_SIZE", 1536))
 CACHED_STREAMING_CHUNK_DELAY = float(os.getenv("CACHED_STREAMING_CHUNK_DELAY", 0.02))
-AUDIO_SPEECH_CHUNK_SIZE = 8192  # chunk_size for audio speech streaming. Balance between latency and memory usage
+AUDIO_SPEECH_CHUNK_SIZE = int(
+    os.getenv("AUDIO_SPEECH_CHUNK_SIZE", 8192)
+)  # chunk_size for audio speech streaming. Balance between latency and memory usage
 MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = int(
     os.getenv("MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB", 512)
 )
@@ -285,10 +287,16 @@
 MAX_LANGFUSE_INITIALIZED_CLIENTS = int(
     os.getenv("MAX_LANGFUSE_INITIALIZED_CLIENTS", 50)
 )
-LOGGING_WORKER_CONCURRENCY = int(os.getenv("LOGGING_WORKER_CONCURRENCY", 100)) # Must be above 0
+LOGGING_WORKER_CONCURRENCY = int(
+    os.getenv("LOGGING_WORKER_CONCURRENCY", 100)
+)  # Must be above 0
 LOGGING_WORKER_MAX_QUEUE_SIZE = int(os.getenv("LOGGING_WORKER_MAX_QUEUE_SIZE", 50_000))
-LOGGING_WORKER_MAX_TIME_PER_COROUTINE = float(os.getenv("LOGGING_WORKER_MAX_TIME_PER_COROUTINE", 20.0))
-LOGGING_WORKER_CLEAR_PERCENTAGE = int(os.getenv("LOGGING_WORKER_CLEAR_PERCENTAGE", 50))  # Percentage of queue to clear (default: 50%)
+LOGGING_WORKER_MAX_TIME_PER_COROUTINE = float(
+    os.getenv("LOGGING_WORKER_MAX_TIME_PER_COROUTINE", 20.0)
+)
+LOGGING_WORKER_CLEAR_PERCENTAGE = int(
+    os.getenv("LOGGING_WORKER_CLEAR_PERCENTAGE", 50)
+)  # Percentage of queue to clear (default: 50%)
 MAX_ITERATIONS_TO_CLEAR_QUEUE = int(os.getenv("MAX_ITERATIONS_TO_CLEAR_QUEUE", 200))
 MAX_TIME_TO_CLEAR_QUEUE = float(os.getenv("MAX_TIME_TO_CLEAR_QUEUE", 5.0))
 LOGGING_WORKER_AGGRESSIVE_CLEAR_COOLDOWN_SECONDS = float(
@@ -866,7 +874,7 @@
     "deepseek_r1",
     "qwen3",
     "twelvelabs",
-    "openai"
+    "openai",
 ]
 
 BEDROCK_EMBEDDING_PROVIDERS_LITERAL = Literal[
 
@@ -20,7 +20,6 @@
     GuardrailEventHooks,
     LitellmParams,
     Mode,
-    PiiEntityType,
 )
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.proxy.guardrails.guardrail_hooks.base import GuardrailConfigModel
 
@@ -9,4 +9,5 @@ Core files:
 - `default_encoding.py`: code for loading the default encoding (tiktoken)
 - `get_llm_provider_logic.py`: code for inferring the LLM provider from a given model name. 
 - `duration_parser.py`: code for parsing durations - e.g. "1d", "1mo", "10s"
+- `api_route_to_call_types.py`: mapping of API routes to their corresponding CallTypes (e.g., `/chat/completions` -> [acompletion, completion])
 
@@ -0,0 +1,38 @@
+"""
+Dictionary mapping API routes to their corresponding CallTypes in LiteLLM.
+
+This dictionary maps each API endpoint to the CallTypes that can be used for that route.
+Each route can have both async (prefixed with 'a') and sync call types.
+"""
+
+from litellm.types.utils import API_ROUTE_TO_CALL_TYPES, CallTypes
+
+
+def get_call_types_for_route(route: str) -> list:
+    """
+    Get the list of CallTypes for a given API route.
+
+    Args:
+        route: API route path (e.g., "/chat/completions")
+
+    Returns:
+        List of CallTypes for that route, or empty list if route not found
+    """
+    return API_ROUTE_TO_CALL_TYPES.get(route, [])
+
+
+def get_routes_for_call_type(call_type: CallTypes) -> list:
+    """
+    Get all routes that use a specific CallType.
+
+    Args:
+        call_type: The CallType to search for
+
+    Returns:
+        List of routes that use this CallType
+    """
+    routes = []
+    for route, types in API_ROUTE_TO_CALL_TYPES.items():
+        if call_type in types:
+            routes.append(route)
+    return routes
@@ -84,3 +84,17 @@ async def process_output_response(
             user_api_key_dict: User API key metadata (passed separately since response doesn't contain it)
         """
         pass
+
+    async def process_output_streaming_response(
+        self,
+        response: Any,
+        guardrail_to_apply: "CustomGuardrail",
+        litellm_logging_obj: Optional["LiteLLMLoggingObj"] = None,
+        user_api_key_dict: Optional["UserAPIKeyAuth"] = None,
+    ) -> Any:
+        """
+        Process output streaming response with guardrails.
+
+        Optional to override in subclasses.
+        """
+        return response
@@ -14,16 +14,16 @@
 This pattern can be replicated for other message formats (e.g., Anthropic).
 """
 
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
 
 import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.llms.base_llm.guardrail_translation.base_translation import BaseTranslation
-from litellm.types.utils import Choices
+from litellm.types.utils import Choices, StreamingChoices
 
 if TYPE_CHECKING:
     from litellm.integrations.custom_guardrail import CustomGuardrail
-    from litellm.types.utils import ModelResponse
+    from litellm.types.utils import ModelResponse, ModelResponseStream
 
 
 class OpenAIChatCompletionsHandler(BaseTranslation):
@@ -241,21 +241,79 @@ async def process_output_response(
 
         return response
 
-    def _has_text_content(self, response: "ModelResponse") -> bool:
+    async def process_output_streaming_response(
+        self,
+        response: "ModelResponseStream",
+        guardrail_to_apply: "CustomGuardrail",
+        litellm_logging_obj: Optional[Any] = None,
+        user_api_key_dict: Optional[Any] = None,
+    ) -> Any:
+        """
+        Process output streaming response by applying guardrails to text content.
+
+        Args:
+            response: LiteLLM ModelResponseStream object
+            guardrail_to_apply: The guardrail instance to apply
+            litellm_logging_obj: Optional logging object
+            user_api_key_dict: User API key metadata to pass to guardrails
+
+        Returns:
+            Modified response with guardrail applied to content
+
+        Response Format Support:
+            - String content: choice.message.content = "text here"
+            - List content: choice.message.content = [{"type": "text", "text": "text here"}, ...]
+        """
+
+        # Step 0: Check if response has any text content to process
+        if not self._has_text_content(response):
+            return response
+
+        texts_to_check: List[str] = []
+        images_to_check: List[str] = []
+        task_mappings: List[Tuple[int, Optional[int]]] = []
+        # Track (choice_index, content_index) for each text
+
+        # Step 1: Extract all text content and images from response choices
+        for choice_idx, choice in enumerate(response.choices):
+
+            self._extract_output_text_and_images(
+                choice=choice,
+                choice_idx=choice_idx,
+                texts_to_check=texts_to_check,
+                images_to_check=images_to_check,
+                task_mappings=task_mappings,
+            )
+
+    def _has_text_content(
+        self, response: Union["ModelResponse", "ModelResponseStream"]
+    ) -> bool:
         """
         Check if response has any text content to process.
 
         Override this method to customize text content detection.
         """
-        for choice in response.choices:
-            if isinstance(choice, litellm.Choices):
-                if choice.message.content and isinstance(choice.message.content, str):
-                    return True
+        from litellm.types.utils import ModelResponse, ModelResponseStream
+
+        if isinstance(response, ModelResponse):
+            for choice in response.choices:
+                if isinstance(choice, litellm.Choices):
+                    if choice.message.content and isinstance(
+                        choice.message.content, str
+                    ):
+                        return True
+        elif isinstance(response, ModelResponseStream):
+            for choice in response.choices:
+                if isinstance(choice, litellm.Choices):
+                    if choice.message.content and isinstance(
+                        choice.message.content, str
+                    ):
+                        return True
         return False
 
     def _extract_output_text_and_images(
         self,
-        choice: Any,
+        choice: Union[Choices, StreamingChoices],
         choice_idx: int,
         texts_to_check: List[str],
         images_to_check: List[str],
@@ -266,21 +324,29 @@ def _extract_output_text_and_images(
 
         Override this method to customize text/image extraction logic.
         """
-        if not isinstance(choice, litellm.Choices):
-            return
-
         verbose_proxy_logger.debug(
             "OpenAI Chat Completions: Processing choice: %s", choice
         )
 
-        if choice.message.content and isinstance(choice.message.content, str):
+        # Determine content source based on choice type
+        content = None
+        if isinstance(choice, litellm.Choices):
+            content = choice.message.content
+        elif isinstance(choice, litellm.StreamingChoices):
+            content = choice.delta.content
+        else:
+            # Unknown choice type, skip processing
+            return
+
+        # Process content if it exists
+        if content and isinstance(content, str):
             # Simple string content
-            texts_to_check.append(choice.message.content)
+            texts_to_check.append(content)
             task_mappings.append((choice_idx, None))
 
-        elif choice.message.content and isinstance(choice.message.content, list):
+        elif content and isinstance(content, list):
             # List content (e.g., multimodal response)
-            for content_idx, content_item in enumerate(choice.message.content):
+            for content_idx, content_item in enumerate(content):
                 # Extract text
                 content_text = content_item.get("text")
                 if content_text:
 
@@ -269,6 +269,71 @@
         "supports_response_schema": true,
         "supports_vision": true
     },
+    "amazon.nova-2-lite-v1:0": {
+        "input_cost_per_token": 3e-07,
+        "litellm_provider": "bedrock_converse",
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 64000,
+        "max_tokens": 64000,
+        "mode": "chat",
+        "output_cost_per_token": 2.5e-06,
+        "supports_function_calling": true,
+        "supports_pdf_input": true,
+        "supports_prompt_caching": true,
+        "supports_reasoning": true,
+        "supports_response_schema": true,
+        "supports_video_input": true,
+        "supports_vision": true
+    },
+    "apac.amazon.nova-2-lite-v1:0": {
+        "input_cost_per_token": 6e-08,
+        "litellm_provider": "bedrock_converse",
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 64000,
+        "max_tokens": 64000,
+        "mode": "chat",
+        "output_cost_per_token": 2.75e-06,
+        "supports_function_calling": true,
+        "supports_pdf_input": true,
+        "supports_prompt_caching": true,
+        "supports_reasoning": true,
+        "supports_response_schema": true,
+        "supports_video_input": true,
+        "supports_vision": true
+    },
+    "eu.amazon.nova-2-lite-v1:0": {
+        "input_cost_per_token": 6e-08,
+        "litellm_provider": "bedrock_converse",
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 64000,
+        "max_tokens": 64000,
+        "mode": "chat",
+        "output_cost_per_token": 2.75e-06,
+        "supports_function_calling": true,
+        "supports_pdf_input": true,
+        "supports_prompt_caching": true,
+        "supports_reasoning": true,
+        "supports_response_schema": true,
+        "supports_video_input": true,
+        "supports_vision": true
+    },
+    "us.amazon.nova-2-lite-v1:0": {
+        "input_cost_per_token": 6e-08,
+        "litellm_provider": "bedrock_converse",
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 64000,
+        "max_tokens": 64000,
+        "mode": "chat",
+        "output_cost_per_token": 2.75e-06,
+        "supports_function_calling": true,
+        "supports_pdf_input": true,
+        "supports_prompt_caching": true,
+        "supports_reasoning": true,
+        "supports_response_schema": true,
+        "supports_video_input": true,
+        "supports_vision": true
+    },
+
     "amazon.nova-micro-v1:0": {
         "input_cost_per_token": 3.5e-08,
         "litellm_provider": "bedrock_converse",
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,6 @@`
`20`	`20`	`GuardrailEventHooks,`
`21`	`21`	`LitellmParams,`
`22`	`22`	`Mode,`
`23`		`- PiiEntityType,`
`24`	`23`	`)`
`25`	`24`	`from litellm.types.llms.openai import AllMessageValues`
`26`	`25`	`from litellm.types.proxy.guardrails.guardrail_hooks.base import GuardrailConfigModel`