From 831ad45c4df36eeb401d718157ae391151e693ab Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Tue, 2 Dec 2025 18:18:08 +0530 Subject: [PATCH] Add ragflow support --- docs/my-website/docs/providers/ragflow.md | 244 ++++++++++++ docs/my-website/sidebars.js | 1 + litellm/__init__.py | 1 + litellm/constants.py | 1 + .../get_llm_provider_logic.py | 10 + litellm/llms/ragflow/__init__.py | 8 + litellm/llms/ragflow/chat/__init__.py | 4 + litellm/llms/ragflow/chat/transformation.py | 264 ++++++++++++ litellm/main.py | 30 ++ ...odel_prices_and_context_window_backup.json | 360 +++++++++++++++-- litellm/types/utils.py | 1 + litellm/utils.py | 2 + .../llms/ragflow/chat/__init__.py | 4 + .../chat/test_ragflow_chat_transformation.py | 376 ++++++++++++++++++ 14 files changed, 1283 insertions(+), 23 deletions(-) create mode 100644 docs/my-website/docs/providers/ragflow.md create mode 100644 litellm/llms/ragflow/__init__.py create mode 100644 litellm/llms/ragflow/chat/__init__.py create mode 100644 litellm/llms/ragflow/chat/transformation.py create mode 100644 tests/test_litellm/llms/ragflow/chat/__init__.py create mode 100644 tests/test_litellm/llms/ragflow/chat/test_ragflow_chat_transformation.py diff --git a/docs/my-website/docs/providers/ragflow.md b/docs/my-website/docs/providers/ragflow.md new file mode 100644 index 000000000000..73223bd07b5f --- /dev/null +++ b/docs/my-website/docs/providers/ragflow.md @@ -0,0 +1,244 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# RAGFlow + +Litellm supports Ragflow's chat completions APIs + +## Supported Features + +- ✅ Chat completions +- ✅ Streaming responses +- ✅ Both chat and agent endpoints +- ✅ Multiple credential sources (params, env vars, litellm_params) +- ✅ OpenAI-compatible API format + + +## API Key + +```python +# env variable +os.environ['RAGFLOW_API_KEY'] +``` + +## API Base + +```python +# env variable +os.environ['RAGFLOW_API_BASE'] +``` + +## Overview + +RAGFlow provides OpenAI-compatible APIs with unique path structures that include chat and agent IDs: + +- **Chat endpoint**: `/api/v1/chats_openai/{chat_id}/chat/completions` +- **Agent endpoint**: `/api/v1/agents_openai/{agent_id}/chat/completions` + +The model name format embeds the endpoint type and ID: +- Chat: `ragflow/chat/{chat_id}/{model_name}` +- Agent: `ragflow/agent/{agent_id}/{model_name}` + + +## Sample Usage - Chat Endpoint + +```python +from litellm import completion +import os + +os.environ['RAGFLOW_API_KEY'] = "your-ragflow-api-key" +os.environ['RAGFLOW_API_BASE'] = "http://localhost:9380" # or your hosted URL + +response = completion( + model="ragflow/chat/my-chat-id/gpt-4o-mini", + messages=[{"role": "user", "content": "How does the deep doc understanding work?"}] +) +print(response) +``` + +## Sample Usage - Agent Endpoint + +```python +from litellm import completion +import os + +os.environ['RAGFLOW_API_KEY'] = "your-ragflow-api-key" +os.environ['RAGFLOW_API_BASE'] = "http://localhost:9380" # or your hosted URL + +response = completion( + model="ragflow/agent/my-agent-id/gpt-4o-mini", + messages=[{"role": "user", "content": "What are the key features?"}] +) +print(response) +``` + +## Sample Usage - With Parameters + +You can also pass `api_key` and `api_base` directly as parameters: + +```python +from litellm import completion + +response = completion( + model="ragflow/chat/my-chat-id/gpt-4o-mini", + messages=[{"role": "user", "content": "Hello!"}], + api_key="your-ragflow-api-key", + api_base="http://localhost:9380" +) +print(response) +``` + +## Sample Usage - Streaming + +```python +from litellm import completion +import os + +os.environ['RAGFLOW_API_KEY'] = "your-ragflow-api-key" +os.environ['RAGFLOW_API_BASE'] = "http://localhost:9380" + +response = completion( + model="ragflow/agent/my-agent-id/gpt-4o-mini", + messages=[{"role": "user", "content": "Explain RAGFlow"}], + stream=True +) + +for chunk in response: + print(chunk) +``` + +## Model Name Format + +The model name must follow one of these formats: + +### Chat Endpoint +``` +ragflow/chat/{chat_id}/{model_name} +``` + +Example: `ragflow/chat/my-chat-id/gpt-4o-mini` + +### Agent Endpoint +``` +ragflow/agent/{agent_id}/{model_name} +``` + +Example: `ragflow/agent/my-agent-id/gpt-4o-mini` + +Where: +- `{chat_id}` or `{agent_id}` is the ID of your chat or agent in RAGFlow +- `{model_name}` is the actual model name (e.g., `gpt-4o-mini`, `gpt-4o`, etc.) + +## Configuration Sources + +LiteLLM supports multiple ways to provide credentials, checked in this order: + +1. **Function parameters**: `api_key="..."`, `api_base="..."` +2. **litellm_params**: `litellm_params={"api_key": "...", "api_base": "..."}` +3. **Environment variables**: `RAGFLOW_API_KEY`, `RAGFLOW_API_BASE` +4. **Global litellm settings**: `litellm.api_key`, `litellm.api_base` + +## Usage - LiteLLM Proxy Server + +### 1. Save key in your environment + +```bash +export RAGFLOW_API_KEY="your-ragflow-api-key" +export RAGFLOW_API_BASE="http://localhost:9380" +``` + +### 2. Start the proxy + + + + +```yaml +model_list: + - model_name: ragflow-chat-gpt4 + litellm_params: + model: ragflow/chat/my-chat-id/gpt-4o-mini + api_key: os.environ/RAGFLOW_API_KEY + api_base: os.environ/RAGFLOW_API_BASE + - model_name: ragflow-agent-gpt4 + litellm_params: + model: ragflow/agent/my-agent-id/gpt-4o-mini + api_key: os.environ/RAGFLOW_API_KEY + api_base: os.environ/RAGFLOW_API_BASE +``` + + + + +```bash +$ litellm --config /path/to/config.yaml + +# Server running on http://0.0.0.0:4000 +``` + + + + +### 3. Test it + + + + +```bash +curl http://0.0.0.0:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "ragflow-chat-gpt4", + "messages": [ + {"role": "user", "content": "How does RAGFlow work?"} + ] + }' +``` + + + + +```python +from openai import OpenAI + +client = OpenAI( + api_key="sk-1234", # Your LiteLLM proxy key + base_url="http://0.0.0.0:4000" +) + +response = client.chat.completions.create( + model="ragflow-chat-gpt4", + messages=[ + {"role": "user", "content": "How does RAGFlow work?"} + ] +) +print(response) +``` + + + + +## API Base URL Handling + +The `api_base` parameter can be provided with or without `/v1` suffix. LiteLLM will automatically handle it: + +- `http://localhost:9380` → `http://localhost:9380/api/v1/chats_openai/{chat_id}/chat/completions` +- `http://localhost:9380/v1` → `http://localhost:9380/api/v1/chats_openai/{chat_id}/chat/completions` +- `http://localhost:9380/api/v1` → `http://localhost:9380/api/v1/chats_openai/{chat_id}/chat/completions` + +All three formats will work correctly. + +## Error Handling + +If you encounter errors: + +1. **Invalid model format**: Ensure your model name follows `ragflow/{chat|agent}/{id}/{model_name}` format +2. **Missing api_base**: Provide `api_base` via parameter, environment variable, or litellm_params +3. **Connection errors**: Verify your RAGFlow server is running and accessible at the provided `api_base` + +:::info + +For more information about passing provider-specific parameters, [go here](../completion/provider_specific_params.md) + +::: + diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index e467711b59dd..55638512ef2c 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -625,6 +625,7 @@ const sidebars = { "providers/petals", "providers/publicai", "providers/predibase", + "providers/ragflow", "providers/recraft", "providers/replicate", { diff --git a/litellm/__init__.py b/litellm/__init__.py index 007eff892c8e..595a01320994 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -1387,6 +1387,7 @@ def add_known_models(): from .llms.v0.chat.transformation import V0ChatConfig from .llms.oci.chat.transformation import OCIChatConfig from .llms.morph.chat.transformation import MorphChatConfig +from .llms.ragflow.chat.transformation import RAGFlowConfig from .llms.lambda_ai.chat.transformation import LambdaAIChatConfig from .llms.hyperbolic.chat.transformation import HyperbolicChatConfig from .llms.vercel_ai_gateway.chat.transformation import VercelAIGatewayConfig diff --git a/litellm/constants.py b/litellm/constants.py index e3de7368c8ad..e252c86777cf 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -586,6 +586,7 @@ "cometapi", "clarifai", "docker_model_runner", + "ragflow", ] openai_text_completion_compatible_providers: List = ( [ # providers that support `/v1/completions` diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py index b10011befcd8..a90d16dba498 100644 --- a/litellm/litellm_core_utils/get_llm_provider_logic.py +++ b/litellm/litellm_core_utils/get_llm_provider_logic.py @@ -840,6 +840,16 @@ def _get_openai_compatible_provider_info( # noqa: PLR0915 ) = litellm.ClarifaiConfig()._get_openai_compatible_provider_info( api_base, api_key ) + elif custom_llm_provider == "ragflow": + full_model = f"ragflow/{model}" + ( + api_base, + dynamic_api_key, + _, + ) = litellm.RAGFlowConfig()._get_openai_compatible_provider_info( + full_model, api_base, api_key, "ragflow" + ) + model = full_model if api_base is not None and not isinstance(api_base, str): raise Exception("api base needs to be a string. api_base={}".format(api_base)) diff --git a/litellm/llms/ragflow/__init__.py b/litellm/llms/ragflow/__init__.py new file mode 100644 index 000000000000..17d12bed31ce --- /dev/null +++ b/litellm/llms/ragflow/__init__.py @@ -0,0 +1,8 @@ +""" +RAGFlow provider for LiteLLM. + +RAGFlow provides OpenAI-compatible APIs with unique path structures: +- Chat endpoint: /api/v1/chats_openai/{chat_id}/chat/completions +- Agent endpoint: /api/v1/agents_openai/{agent_id}/chat/completions +""" + diff --git a/litellm/llms/ragflow/chat/__init__.py b/litellm/llms/ragflow/chat/__init__.py new file mode 100644 index 000000000000..0e0f47d07b6c --- /dev/null +++ b/litellm/llms/ragflow/chat/__init__.py @@ -0,0 +1,4 @@ +""" +RAGFlow chat completion configuration. +""" + diff --git a/litellm/llms/ragflow/chat/transformation.py b/litellm/llms/ragflow/chat/transformation.py new file mode 100644 index 000000000000..d33a1593be8a --- /dev/null +++ b/litellm/llms/ragflow/chat/transformation.py @@ -0,0 +1,264 @@ +""" +RAGFlow provider configuration for OpenAI-compatible API. + +RAGFlow provides OpenAI-compatible APIs with unique path structures: +- Chat endpoint: /api/v1/chats_openai/{chat_id}/chat/completions +- Agent endpoint: /api/v1/agents_openai/{agent_id}/chat/completions + +Model name format: +- Chat: ragflow/chat/{chat_id}/{model_name} +- Agent: ragflow/agent/{agent_id}/{model_name} +""" + +from typing import Any, List, Optional, Tuple + +import litellm +from litellm.llms.openai.openai import OpenAIConfig +from litellm.secret_managers.main import get_secret, get_secret_str +from litellm.types.llms.openai import AllMessageValues + + +class RAGFlowConfig(OpenAIConfig): + """ + Configuration for RAGFlow OpenAI-compatible API. + + Handles both chat and agent endpoints by parsing the model name format: + - ragflow/chat/{chat_id}/{model_name} for chat endpoints + - ragflow/agent/{agent_id}/{model_name} for agent endpoints + """ + + def _parse_ragflow_model(self, model: str) -> Tuple[str, str, str]: + """ + Parse RAGFlow model name format: ragflow/{endpoint_type}/{id}/{model_name} + + Args: + model: Model name in format ragflow/chat/{chat_id}/{model} or ragflow/agent/{agent_id}/{model} + + Returns: + Tuple of (endpoint_type, id, model_name) + + Raises: + ValueError: If model format is invalid + """ + parts = model.split("/") + if len(parts) < 4: + raise ValueError( + f"Invalid RAGFlow model format: {model}. " + f"Expected format: ragflow/chat/{{chat_id}}/{{model}} or ragflow/agent/{{agent_id}}/{{model}}" + ) + + if parts[0] != "ragflow": + raise ValueError( + f"Invalid RAGFlow model format: {model}. Must start with 'ragflow/'" + ) + + endpoint_type = parts[1] + if endpoint_type not in ["chat", "agent"]: + raise ValueError( + f"Invalid RAGFlow endpoint type: {endpoint_type}. Must be 'chat' or 'agent'" + ) + + entity_id = parts[2] + model_name = "/".join(parts[3:]) # Handle model names that might contain slashes + + return endpoint_type, entity_id, model_name + + def get_complete_url( + self, + api_base: Optional[str], + api_key: Optional[str], + model: str, + optional_params: dict, + litellm_params: dict, + stream: Optional[bool] = None, + ) -> str: + """ + Get the complete URL for the RAGFlow API call. + + Constructs URL based on endpoint type: + - Chat: /api/v1/chats_openai/{chat_id}/chat/completions + - Agent: /api/v1/agents_openai/{agent_id}/chat/completions + + Args: + api_base: Base API URL (e.g., http://ragflow-server:port or http://ragflow-server:port/v1) + api_key: API key (not used in URL construction) + model: Model name in format ragflow/{endpoint_type}/{id}/{model} + optional_params: Optional parameters + litellm_params: LiteLLM parameters (may contain api_base) + stream: Whether streaming is enabled + + Returns: + Complete URL for the API call + """ + # Get api_base from multiple sources: input param, litellm_params, environment, or global litellm setting + if litellm_params and hasattr(litellm_params, 'api_base') and litellm_params.api_base: + api_base = api_base or litellm_params.api_base + + api_base = ( + api_base + or litellm.api_base + or get_secret("RAGFLOW_API_BASE") + or get_secret_str("RAGFLOW_API_BASE") + ) + + if api_base is None: + raise ValueError("api_base is required for RAGFlow provider. Set it via api_base parameter, RAGFLOW_API_BASE environment variable, or litellm.api_base") + + # Parse model name to extract endpoint type and ID + endpoint_type, entity_id, _ = self._parse_ragflow_model(model) + + # Remove trailing slash from api_base if present + api_base = api_base.rstrip("/") + + # Strip /v1 or /api/v1 from api_base if present, since we'll add the full path + # Check /api/v1 first because /api/v1 ends with /v1 + if api_base.endswith("/api/v1"): + api_base = api_base[:-7] # Remove /api/v1 + elif api_base.endswith("/v1"): + api_base = api_base[:-3] # Remove /v1 + + # Construct the RAGFlow-specific path + if endpoint_type == "chat": + path = f"/api/v1/chats_openai/{entity_id}/chat/completions" + else: # agent + path = f"/api/v1/agents_openai/{entity_id}/chat/completions" + + # Ensure path starts with / + if not path.startswith("/"): + path = "/" + path + + return f"{api_base}{path}" + + def _get_openai_compatible_provider_info( + self, + model: str, + api_base: Optional[str], + api_key: Optional[str], + custom_llm_provider: str, + ) -> Tuple[Optional[str], Optional[str], str]: + """ + Get OpenAI-compatible provider information for RAGFlow. + + Args: + model: Model name (will be parsed to extract actual model name) + api_base: Base API URL (from input params) + api_key: API key (from input params) + custom_llm_provider: Custom LLM provider name + + Returns: + Tuple of (api_base, api_key, custom_llm_provider) + """ + # Parse model to extract the actual model name + # The model name will be stored in litellm_params for use in requests + _, _, actual_model = self._parse_ragflow_model(model) + + # Get api_base from multiple sources: input param, environment, or global litellm setting + dynamic_api_base = ( + api_base + or litellm.api_base + or get_secret("RAGFLOW_API_BASE") + or get_secret_str("RAGFLOW_API_BASE") + ) + + # Get api_key from multiple sources: input param, environment, or global litellm setting + dynamic_api_key = ( + api_key + or litellm.api_key + or get_secret_str("RAGFLOW_API_KEY") + ) + + return dynamic_api_base, dynamic_api_key, custom_llm_provider + + def validate_environment( + self, + headers: dict, + model: str, + messages: List[AllMessageValues], + optional_params: dict, + litellm_params: dict, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + ) -> dict: + """ + Validate environment and set up headers for RAGFlow API. + + Args: + headers: Request headers + model: Model name + messages: Chat messages + optional_params: Optional parameters + litellm_params: LiteLLM parameters (may contain api_key) + api_key: API key (from input params) + api_base: Base API URL + + Returns: + Updated headers dictionary + """ + # Use api_key from litellm_params if available, otherwise fall back to other sources + if litellm_params and hasattr(litellm_params, 'api_key') and litellm_params.api_key: + api_key = api_key or litellm_params.api_key + + # Get api_key from multiple sources: input param, litellm_params, environment, or global litellm setting + api_key = ( + api_key + or litellm.api_key + or get_secret_str("RAGFLOW_API_KEY") + ) + + if api_key is not None: + headers["Authorization"] = f"Bearer {api_key}" + + # Ensure Content-Type is set to application/json + if "content-type" not in headers and "Content-Type" not in headers: + headers["Content-Type"] = "application/json" + + # Parse model to extract actual model name and store it + # The actual model name should be used in the request body + try: + _, _, actual_model = self._parse_ragflow_model(model) + # Store the actual model name in litellm_params for use in transform_request + litellm_params["_ragflow_actual_model"] = actual_model + except ValueError: + # If parsing fails, use the original model name + pass + + return headers + + def transform_request( + self, + model: str, + messages: List[AllMessageValues], + optional_params: dict, + litellm_params: dict, + headers: dict, + ) -> dict: + """ + Transform request for RAGFlow API. + + Uses the actual model name extracted from the RAGFlow model format. + + Args: + model: Model name in RAGFlow format + messages: Chat messages + optional_params: Optional parameters + litellm_params: LiteLLM parameters (may contain _ragflow_actual_model) + headers: Request headers + + Returns: + Transformed request dictionary + """ + # Get the actual model name from litellm_params if available + actual_model = litellm_params.get("_ragflow_actual_model") + if actual_model is None: + # Fallback: try to parse the model name + try: + _, _, actual_model = self._parse_ragflow_model(model) + except ValueError: + # If parsing fails, use the original model name + actual_model = model + + # Use parent's transform_request with the actual model name + return super().transform_request( + actual_model, messages, optional_params, litellm_params, headers + ) + diff --git a/litellm/main.py b/litellm/main.py index a09a94530177..fba2fc9b9a6e 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1989,6 +1989,36 @@ def completion( # type: ignore # noqa: PLR0915 ) raise e + elif custom_llm_provider == "ragflow": + ## COMPLETION CALL - RAGFlow uses HTTP handler to support custom URL paths + try: + response = base_llm_http_handler.completion( + model=model, + messages=messages, + headers=headers, + model_response=model_response, + api_key=api_key, + api_base=api_base, + acompletion=acompletion, + logging_obj=logging, + optional_params=optional_params, + litellm_params=litellm_params, + shared_session=shared_session, + timeout=timeout, + client=client, + custom_llm_provider=custom_llm_provider, + encoding=encoding, + stream=stream, + provider_config=provider_config, + ) + except Exception as e: + logging.post_call( + input=messages, + api_key=api_key, + original_response=str(e), + additional_args={"headers": headers}, + ) + raise e elif custom_llm_provider == "xai": ## COMPLETION CALL try: diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 9fdc1704f410..f28e9b1290ff 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -6717,6 +6717,33 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, + "claude-opus-4-5": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, "claude-sonnet-4-20250514": { "deprecation_date": "2026-05-14", "cache_creation_input_token_cost": 3.75e-06, @@ -7824,26 +7851,298 @@ "source": "https://www.databricks.com/product/pricing/foundation-model-serving" }, "databricks/databricks-claude-3-7-sonnet": { - "input_cost_per_token": 2.5e-06, - "input_dbu_cost_per_token": 3.571e-05, + "input_cost_per_token": 2.9999900000000002e-06, + "input_dbu_cost_per_token": 4.2857e-05, "litellm_provider": "databricks", "max_input_tokens": 200000, "max_output_tokens": 128000, "max_tokens": 200000, "metadata": { - "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Claude 3.7 conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 1.7857e-05, - "output_db_cost_per_token": 0.000214286, - "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "output_cost_per_token": 1.5000020000000002e-05, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-haiku-4-5": { + "input_cost_per_token": 1.00002e-06, + "input_dbu_cost_per_token": 1.4286e-05, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 5.00003e-06, + "output_dbu_cost_per_token": 7.1429e-05, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4": { + "input_cost_per_token": 1.5000020000000002e-05, + "input_dbu_cost_per_token": 0.000214286, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 7.500003000000001e-05, + "output_dbu_cost_per_token": 0.001071429, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4-1": { + "input_cost_per_token": 1.5000020000000002e-05, + "input_dbu_cost_per_token": 0.000214286, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 7.500003000000001e-05, + "output_dbu_cost_per_token": 0.001071429, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4-5": { + "input_cost_per_token": 5.00003e-06, + "input_dbu_cost_per_token": 7.1429e-05, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 2.5000010000000002e-05, + "output_dbu_cost_per_token": 0.000357143, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-sonnet-4": { + "input_cost_per_token": 2.9999900000000002e-06, + "input_dbu_cost_per_token": 4.2857e-05, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000020000000002e-05, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-sonnet-4-1": { + "input_cost_per_token": 2.9999900000000002e-06, + "input_dbu_cost_per_token": 4.2857e-05, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000020000000002e-05, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-sonnet-4-5": { + "input_cost_per_token": 2.9999900000000002e-06, + "input_dbu_cost_per_token": 4.2857e-05, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000020000000002e-05, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", "supports_assistant_prefill": true, "supports_function_calling": true, "supports_reasoning": true, "supports_tool_choice": true }, + "databricks/databricks-gemini-2-5-flash": { + "input_cost_per_token": 3.0001999999999996e-07, + "input_dbu_cost_per_token": 4.285999999999999e-06, + "litellm_provider": "databricks", + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 1048576, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 2.49998e-06, + "output_dbu_cost_per_token": 3.5714e-05, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "databricks/databricks-gemini-2-5-pro": { + "input_cost_per_token": 1.24999e-06, + "input_dbu_cost_per_token": 1.7857e-05, + "litellm_provider": "databricks", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 1048576, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 9.999990000000002e-06, + "output_dbu_cost_per_token": 0.000142857, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "databricks/databricks-gemma-3-12b": { + "input_cost_per_token": 1.5000999999999998e-07, + "input_dbu_cost_per_token": 2.1429999999999996e-06, + "litellm_provider": "databricks", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 5.0001e-07, + "output_dbu_cost_per_token": 7.143e-06, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-gpt-5": { + "input_cost_per_token": 1.24999e-06, + "input_dbu_cost_per_token": 1.7857e-05, + "litellm_provider": "databricks", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 400000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 9.999990000000002e-06, + "output_dbu_cost_per_token": 0.000142857, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-5-1": { + "input_cost_per_token": 1.24999e-06, + "input_dbu_cost_per_token": 1.7857e-05, + "litellm_provider": "databricks", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 400000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 9.999990000000002e-06, + "output_dbu_cost_per_token": 0.000142857, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-5-mini": { + "input_cost_per_token": 2.4997000000000006e-07, + "input_dbu_cost_per_token": 3.571e-06, + "litellm_provider": "databricks", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 400000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.9999700000000004e-06, + "output_dbu_cost_per_token": 2.8571e-05, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-5-nano": { + "input_cost_per_token": 4.998e-08, + "input_dbu_cost_per_token": 7.14e-07, + "litellm_provider": "databricks", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 400000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 3.9998000000000007e-07, + "output_dbu_cost_per_token": 5.714000000000001e-06, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-oss-120b": { + "input_cost_per_token": 1.5000999999999998e-07, + "input_dbu_cost_per_token": 2.1429999999999996e-06, + "litellm_provider": "databricks", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 5.9997e-07, + "output_dbu_cost_per_token": 8.571e-06, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-gpt-oss-20b": { + "input_cost_per_token": 7e-08, + "input_dbu_cost_per_token": 1e-06, + "litellm_provider": "databricks", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 3.0001999999999996e-07, + "output_dbu_cost_per_token": 4.285999999999999e-06, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, "databricks/databricks-gte-large-en": { - "input_cost_per_token": 1.2999e-07, + "input_cost_per_token": 1.2999000000000001e-07, "input_dbu_cost_per_token": 1.857e-06, "litellm_provider": "databricks", "max_input_tokens": 8192, @@ -7868,14 +8167,14 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 1.5e-06, + "output_cost_per_token": 1.5000300000000002e-06, "output_dbu_cost_per_token": 2.1429e-05, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, "databricks/databricks-llama-4-maverick": { - "input_cost_per_token": 5e-06, - "input_dbu_cost_per_token": 7.143e-05, + "input_cost_per_token": 5.0001e-07, + "input_dbu_cost_per_token": 7.143e-06, "litellm_provider": "databricks", "max_input_tokens": 128000, "max_output_tokens": 128000, @@ -7884,13 +8183,13 @@ "notes": "Databricks documentation now provides both DBU costs (_dbu_cost_per_token) and dollar costs(_cost_per_token)." }, "mode": "chat", - "output_cost_per_token": 1.5e-05, - "output_dbu_cost_per_token": 0.00021429, + "output_cost_per_token": 1.5000300000000002e-06, + "output_dbu_cost_per_token": 2.1429e-05, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, "databricks/databricks-meta-llama-3-1-405b-instruct": { - "input_cost_per_token": 5e-06, + "input_cost_per_token": 5.00003e-06, "input_dbu_cost_per_token": 7.1429e-05, "litellm_provider": "databricks", "max_input_tokens": 128000, @@ -7900,14 +8199,29 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 1.500002e-05, - "output_db_cost_per_token": 0.000214286, + "output_cost_per_token": 1.5000020000000002e-05, + "output_dbu_cost_per_token": 0.000214286, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, + "databricks/databricks-meta-llama-3-1-8b-instruct": { + "input_cost_per_token": 1.5000999999999998e-07, + "input_dbu_cost_per_token": 2.1429999999999996e-06, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 4.5003000000000007e-07, + "output_dbu_cost_per_token": 6.429000000000001e-06, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, "databricks/databricks-meta-llama-3-3-70b-instruct": { - "input_cost_per_token": 1.00002e-06, - "input_dbu_cost_per_token": 1.4286e-05, + "input_cost_per_token": 5.0001e-07, + "input_dbu_cost_per_token": 7.143e-06, "litellm_provider": "databricks", "max_input_tokens": 128000, "max_output_tokens": 128000, @@ -7916,8 +8230,8 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 2.99999e-06, - "output_dbu_cost_per_token": 4.2857e-05, + "output_cost_per_token": 1.5000300000000002e-06, + "output_dbu_cost_per_token": 2.1429e-05, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, @@ -7932,7 +8246,7 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 2.99999e-06, + "output_cost_per_token": 2.9999900000000002e-06, "output_dbu_cost_per_token": 4.2857e-05, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true @@ -7948,13 +8262,13 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 9.9902e-07, + "output_cost_per_token": 1.00002e-06, "output_dbu_cost_per_token": 1.4286e-05, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, "databricks/databricks-mpt-30b-instruct": { - "input_cost_per_token": 9.9902e-07, + "input_cost_per_token": 1.00002e-06, "input_dbu_cost_per_token": 1.4286e-05, "litellm_provider": "databricks", "max_input_tokens": 8192, @@ -7964,7 +8278,7 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 9.9902e-07, + "output_cost_per_token": 1.00002e-06, "output_dbu_cost_per_token": 1.4286e-05, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 58267fdfea99..942192e2edf0 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -2618,6 +2618,7 @@ class LlmProviders(str, Enum): DATABRICKS = "databricks" EMPOWER = "empower" GITHUB = "github" + RAGFLOW = "ragflow" COMPACTIFAI = "compactifai" DOCKER_MODEL_RUNNER = "docker_model_runner" CUSTOM = "custom" diff --git a/litellm/utils.py b/litellm/utils.py index 6c50afc5f49e..eb671ce2b993 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -7110,6 +7110,8 @@ def get_provider_chat_config( # noqa: PLR0915 return litellm.CompactifAIChatConfig() elif litellm.LlmProviders.GITHUB_COPILOT == provider: return litellm.GithubCopilotConfig() + elif litellm.LlmProviders.RAGFLOW == provider: + return litellm.RAGFlowConfig() elif ( litellm.LlmProviders.CUSTOM == provider or litellm.LlmProviders.CUSTOM_OPENAI == provider diff --git a/tests/test_litellm/llms/ragflow/chat/__init__.py b/tests/test_litellm/llms/ragflow/chat/__init__.py new file mode 100644 index 000000000000..4e074b841506 --- /dev/null +++ b/tests/test_litellm/llms/ragflow/chat/__init__.py @@ -0,0 +1,4 @@ +""" +RAGFlow chat transformation tests. +""" + diff --git a/tests/test_litellm/llms/ragflow/chat/test_ragflow_chat_transformation.py b/tests/test_litellm/llms/ragflow/chat/test_ragflow_chat_transformation.py new file mode 100644 index 000000000000..90f2504f94c8 --- /dev/null +++ b/tests/test_litellm/llms/ragflow/chat/test_ragflow_chat_transformation.py @@ -0,0 +1,376 @@ +""" +Test file for RAGFlow chat transformation functionality. + +Tests the model name parsing, URL construction, and request transformation +for RAGFlow's OpenAI-compatible API with custom path structures. +""" + +import os +import sys +from unittest.mock import Mock, patch + +import pytest + +# Add the project root to Python path +sys.path.insert(0, os.path.abspath("../../../../..")) + +import litellm +from litellm.llms.ragflow.chat.transformation import RAGFlowConfig +from litellm.types.llms.openai import AllMessageValues + + +class TestRAGFlowChatTransformation: + """Test suite for RAGFlow chat transformation functionality.""" + + def test_parse_ragflow_model_chat(self): + """Test parsing of chat model format.""" + config = RAGFlowConfig() + + model = "ragflow/chat/my-chat-id/gpt-4o-mini" + endpoint_type, entity_id, model_name = config._parse_ragflow_model(model) + + assert endpoint_type == "chat" + assert entity_id == "my-chat-id" + assert model_name == "gpt-4o-mini" + + def test_parse_ragflow_model_agent(self): + """Test parsing of agent model format.""" + config = RAGFlowConfig() + + model = "ragflow/agent/my-agent-id/gpt-4o-mini" + endpoint_type, entity_id, model_name = config._parse_ragflow_model(model) + + assert endpoint_type == "agent" + assert entity_id == "my-agent-id" + assert model_name == "gpt-4o-mini" + + def test_parse_ragflow_model_with_slashes_in_model_name(self): + """Test parsing when model name contains slashes.""" + config = RAGFlowConfig() + + model = "ragflow/chat/my-chat-id/openai/gpt-4o-mini" + endpoint_type, entity_id, model_name = config._parse_ragflow_model(model) + + assert endpoint_type == "chat" + assert entity_id == "my-chat-id" + assert model_name == "openai/gpt-4o-mini" + + def test_parse_ragflow_model_invalid_format(self): + """Test parsing with invalid model format.""" + config = RAGFlowConfig() + + with pytest.raises(ValueError, match="Invalid RAGFlow model format"): + config._parse_ragflow_model("ragflow/chat/model-name") + + with pytest.raises(ValueError, match="Invalid RAGFlow model format"): + config._parse_ragflow_model("invalid/chat/id/model") + + with pytest.raises(ValueError, match="Must start with 'ragflow/'"): + config._parse_ragflow_model("not-ragflow/chat/id/model") + + def test_parse_ragflow_model_invalid_endpoint_type(self): + """Test parsing with invalid endpoint type.""" + config = RAGFlowConfig() + + with pytest.raises(ValueError, match="Invalid RAGFlow endpoint type"): + config._parse_ragflow_model("ragflow/invalid/my-id/model") + + def test_get_complete_url_chat(self): + """Test URL construction for chat endpoint.""" + config = RAGFlowConfig() + + model = "ragflow/chat/my-chat-id/gpt-4o-mini" + api_base = "http://localhost:9380" + + url = config.get_complete_url( + api_base=api_base, + api_key=None, + model=model, + optional_params={}, + litellm_params={}, + stream=False, + ) + + assert url == "http://localhost:9380/api/v1/chats_openai/my-chat-id/chat/completions" + + def test_get_complete_url_agent(self): + """Test URL construction for agent endpoint.""" + config = RAGFlowConfig() + + model = "ragflow/agent/my-agent-id/gpt-4o-mini" + api_base = "http://localhost:9380" + + url = config.get_complete_url( + api_base=api_base, + api_key=None, + model=model, + optional_params={}, + litellm_params={}, + stream=False, + ) + + assert url == "http://localhost:9380/api/v1/agents_openai/my-agent-id/chat/completions" + + def test_get_complete_url_strips_v1(self): + """Test URL construction when api_base ends with /v1.""" + config = RAGFlowConfig() + + model = "ragflow/chat/my-chat-id/gpt-4o-mini" + api_base = "http://localhost:9380/v1" + + url = config.get_complete_url( + api_base=api_base, + api_key=None, + model=model, + optional_params={}, + litellm_params={}, + stream=False, + ) + + assert url == "http://localhost:9380/api/v1/chats_openai/my-chat-id/chat/completions" + + def test_get_complete_url_strips_api_v1(self): + """Test URL construction when api_base ends with /api/v1.""" + config = RAGFlowConfig() + + model = "ragflow/agent/my-agent-id/gpt-4o-mini" + api_base = "http://localhost:9380/api/v1" + + url = config.get_complete_url( + api_base=api_base, + api_key=None, + model=model, + optional_params={}, + litellm_params={}, + stream=False, + ) + + assert url == "http://localhost:9380/api/v1/agents_openai/my-agent-id/chat/completions" + + def test_get_complete_url_from_litellm_params(self): + """Test URL construction with api_base from litellm_params.""" + config = RAGFlowConfig() + + model = "ragflow/chat/my-chat-id/gpt-4o-mini" + # Create a simple dict-like object for litellm_params + class LiteLLMParams: + def __init__(self): + self.api_base = "http://ragflow-server:9380" + + litellm_params = LiteLLMParams() + + url = config.get_complete_url( + api_base=None, + api_key=None, + model=model, + optional_params={}, + litellm_params=litellm_params, + stream=False, + ) + + assert url == "http://ragflow-server:9380/api/v1/chats_openai/my-chat-id/chat/completions" + + def test_get_complete_url_missing_api_base(self): + """Test URL construction when api_base is missing.""" + config = RAGFlowConfig() + + model = "ragflow/chat/my-chat-id/gpt-4o-mini" + + with pytest.raises(ValueError, match="api_base is required"): + config.get_complete_url( + api_base=None, + api_key=None, + model=model, + optional_params={}, + litellm_params={}, + stream=False, + ) + + @patch.dict(os.environ, {"RAGFLOW_API_BASE": "http://env-ragflow:9380"}) + def test_get_complete_url_from_environment(self): + """Test URL construction with api_base from environment variable.""" + config = RAGFlowConfig() + + model = "ragflow/agent/my-agent-id/gpt-4o-mini" + + url = config.get_complete_url( + api_base=None, + api_key=None, + model=model, + optional_params={}, + litellm_params={}, + stream=False, + ) + + assert url == "http://env-ragflow:9380/api/v1/agents_openai/my-agent-id/chat/completions" + + def test_validate_environment_sets_headers(self): + """Test that validate_environment sets proper headers.""" + config = RAGFlowConfig() + + headers = {} + model = "ragflow/chat/my-chat-id/gpt-4o-mini" + messages = [{"role": "user", "content": "Hello"}] + api_key = "test-api-key" + + result_headers = config.validate_environment( + headers=headers, + model=model, + messages=messages, + optional_params={}, + litellm_params={}, + api_key=api_key, + api_base="http://localhost:9380", + ) + + assert result_headers["Authorization"] == "Bearer test-api-key" + assert result_headers["Content-Type"] == "application/json" + + def test_validate_environment_stores_actual_model(self): + """Test that validate_environment stores actual model name.""" + config = RAGFlowConfig() + + headers = {} + model = "ragflow/chat/my-chat-id/gpt-4o-mini" + messages = [{"role": "user", "content": "Hello"}] + litellm_params = {} + + config.validate_environment( + headers=headers, + model=model, + messages=messages, + optional_params={}, + litellm_params=litellm_params, + api_key="test-key", + api_base="http://localhost:9380", + ) + + assert litellm_params["_ragflow_actual_model"] == "gpt-4o-mini" + + @patch.dict(os.environ, {"RAGFLOW_API_KEY": "env-api-key"}) + def test_validate_environment_from_environment(self): + """Test that validate_environment gets api_key from environment.""" + config = RAGFlowConfig() + + headers = {} + model = "ragflow/agent/my-agent-id/gpt-4o-mini" + messages = [{"role": "user", "content": "Hello"}] + + result_headers = config.validate_environment( + headers=headers, + model=model, + messages=messages, + optional_params={}, + litellm_params={}, + api_key=None, + api_base="http://localhost:9380", + ) + + assert result_headers["Authorization"] == "Bearer env-api-key" + + def test_validate_environment_from_litellm_params(self): + """Test that validate_environment gets api_key from litellm_params.""" + config = RAGFlowConfig() + + headers = {} + model = "ragflow/chat/my-chat-id/gpt-4o-mini" + messages = [{"role": "user", "content": "Hello"}] + # Create a simple object for litellm_params with api_key attribute + class LiteLLMParams: + def __init__(self): + self.api_key = "litellm-params-key" + def __setitem__(self, key, value): + setattr(self, key, value) + + litellm_params = LiteLLMParams() + + result_headers = config.validate_environment( + headers=headers, + model=model, + messages=messages, + optional_params={}, + litellm_params=litellm_params, + api_key=None, + api_base="http://localhost:9380", + ) + + assert result_headers["Authorization"] == "Bearer litellm-params-key" + + def test_transform_request_uses_actual_model(self): + """Test that transform_request uses the actual model name.""" + config = RAGFlowConfig() + + model = "ragflow/chat/my-chat-id/gpt-4o-mini" + messages = [{"role": "user", "content": "Hello"}] + litellm_params = {"_ragflow_actual_model": "gpt-4o-mini"} + + # Test the actual behavior by checking the model in the result + result = config.transform_request( + model=model, + messages=messages, + optional_params={}, + litellm_params=litellm_params, + headers={}, + ) + + # The result should contain the actual model name, not the full ragflow path + assert result["model"] == "gpt-4o-mini" + assert result["messages"] == messages + + def test_transform_request_fallback_parsing(self): + """Test that transform_request falls back to parsing if _ragflow_actual_model is missing.""" + config = RAGFlowConfig() + + model = "ragflow/agent/my-agent-id/gpt-4o-mini" + messages = [{"role": "user", "content": "Hello"}] + litellm_params = {} # Missing _ragflow_actual_model + + result = config.transform_request( + model=model, + messages=messages, + optional_params={}, + litellm_params=litellm_params, + headers={}, + ) + + # Should parse and use the actual model name + assert result["model"] == "gpt-4o-mini" + assert result["messages"] == messages + + def test_get_openai_compatible_provider_info(self): + """Test _get_openai_compatible_provider_info returns correct values.""" + config = RAGFlowConfig() + + model = "ragflow/chat/my-chat-id/gpt-4o-mini" + api_base = "http://localhost:9380" + api_key = "test-key" + + result_api_base, result_api_key, result_provider = config._get_openai_compatible_provider_info( + model=model, + api_base=api_base, + api_key=api_key, + custom_llm_provider="ragflow", + ) + + assert result_api_base == api_base + assert result_api_key == api_key + assert result_provider == "ragflow" + + @patch.dict(os.environ, {"RAGFLOW_API_BASE": "http://env-base:9380", "RAGFLOW_API_KEY": "env-key"}) + def test_get_openai_compatible_provider_info_from_env(self): + """Test _get_openai_compatible_provider_info gets values from environment.""" + config = RAGFlowConfig() + + model = "ragflow/agent/my-agent-id/gpt-4o-mini" + + result_api_base, result_api_key, result_provider = config._get_openai_compatible_provider_info( + model=model, + api_base=None, + api_key=None, + custom_llm_provider="ragflow", + ) + + assert result_api_base == "http://env-base:9380" + assert result_api_key == "env-key" + assert result_provider == "ragflow" +