diff --git a/.gitignore b/.gitignore index 002274a..840090d 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ service_account.json # Since we are running it as a library, better not to commit the lock file uv.lock .secrets +openapi.json diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f49ae5a..dc24749 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -25,16 +25,16 @@ Ensure all tests pass: `pytest -v` ## Local Build for QA and manual testing -1. Use `litellm_docker_compose.yaml` to start LiteLLM and Postgres locally: +1. Use `anyllm_docker_compose.yaml` to start Any-LLM-Gateway and Postgres locally: ```bash - docker compose -f litellm_docker_compose.yaml up -d + docker compose -f anyllm_docker_compose.yaml up -d ``` or if you are using legacy docker-compose: ```bash - docker-compose -f litellm_docker_compose.yaml up -d + docker-compose -f anyllm_docker_compose.yaml up -d ``` 2. Create a second database that is needed for authentication @@ -43,7 +43,7 @@ or if you are using legacy docker-compose: bash scripts/create-app-attest-database.sh ``` -LiteLLM will be accessible at `localhost:4000` and `localhost:4000/ui`. +Any-LLM-Gateway will be accessible at `localhost:4000` and `localhost:4000/ui`. 3. Run MLPA with @@ -54,7 +54,7 @@ LiteLLM will be accessible at `localhost:4000` and `localhost:4000/ui`. 4. Stop the service with ```bash -docker compose -f litellm_docker_compose.yaml down +docker compose -f anyllm_docker_compose.yaml down ``` ### Useful CURLs for QA @@ -79,7 +79,7 @@ curl --location 'http://0.0.0.0:8080/health/readiness' \ curl --location 'http://0.0.0.0:8080/v1/chat/completions' \ --header 'Content-Type: application/json' \ --header 'x-fxa-authorization: Bearer {YOUR_MOZILLA_FXA_TOKEN}' \ - --header 'X-LiteLLM-Key: Bearer {MASTER_KEY}' \ + --header 'X-AnyLLM-Key: Bearer {MASTER_KEY}' \ --data '{ "model": "openai/gpt-4o", "messages": [{ @@ -89,7 +89,7 @@ curl --location 'http://0.0.0.0:8080/health/readiness' \ }' ``` -1. LiteLLM liveness: +1. Any-LLM-Gateway liveness: ```bash curl --location 'http://localhost:4000/health/liveness' \ @@ -101,16 +101,16 @@ curl --location 'http://localhost:4000/health/liveness' \ ```bash curl --location 'http://localhost:4000/models' \ --header 'Content-Type: application/json' \ ---header 'X-LiteLLM-Key: Bearer {MASTER_KEY}' \ +--header 'X-AnyLLM-Key: Bearer {MASTER_KEY}' \ --data '' ``` -1. Completion directly from LiteLLM: +1. Completion directly from Any-LLM-Gateway: ```bash curl --location 'http://localhost:4000/v1/chat/completions' \ --header 'Content-Type: application/json' \ ---header 'X-LiteLLM-Key: Bearer {MASTER_KEY}' \ +--header 'X-AnyLLM-Key: Bearer {MASTER_KEY}' \ --data '{ "model": "openai/gpt-4o", "messages": [ diff --git a/README.md b/README.md index 6b87638..f0fad97 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Mozilla LLM Proxy Auth (MLPA) -A proxy to verify App Attest/FxA payloads and proxy requests through LiteLLM to enact budgets and per user management. +A proxy to verify App Attest/FxA payloads and proxy requests through any-llm-gateway to enact budgets and per user management. ## Setup @@ -12,9 +12,13 @@ This creates a virtual environment in `.venv/`, installs dependencies, and insta ## Running MLPA locally with Docker -### Run LiteLLM +### Run Any-LLM-Gateway -`docker compose -f litellm_docker_compose.yaml up -d` +The any-llm-gateway image requires authentication to pull: see [github docs](https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-container-registry#authenticating-with-a-personal-access-token-classic) for help with creating a PAT and authenticating docker to the registry. +```bash +echo $GITHUB_PAT | docker login ghcr.io -u USERNAME --password-stdin # The command to authenticate docker with ghcr +docker compose -f anyllm_docker_compose.yaml up -d +``` ### Run MLPA @@ -30,14 +34,14 @@ pip install --no-cache-dir -e . mlpa ``` -## Config (see [LiteLLM Documentation](https://docs.litellm.ai/docs/simple_proxy_old_doc) for more config options) +## Config `.env` (see `config.py` for all configuration variables) ``` MASTER_KEY="sk-1234..." -LITELLM_API_BASE="http://mlpa:4000" -DATABASE_URL=postgresql://... # required for direct user editing in SQL +GATEWAY_API_BASE="http://any-llm-gateway:8000" +DATABASE_URL=postgresql://gateway:gateway@postgres:5432 CHALLENGE_EXPIRY_SECONDS=300 PORT=8080 @@ -47,12 +51,14 @@ APP_DEVELOPMENT_TEAM="12BC943KDC" CLIENT_ID="..." CLIENT_SECRET="..." -MODEL_NAME="" +MODEL_NAME="vertexai:model-name" # Use provider:model format TEMPERATURE=0.1 TOP_P=0.01 ``` -### Also See `litellm_config.yaml` for litellm config +### Gateway Configuration + +See `gateway_config.yaml` for any-llm-gateway configuration. Service account configured to hit VertexAI: `service_account.json` should be in directory root diff --git a/anyllm_docker_compose.yaml b/anyllm_docker_compose.yaml new file mode 100644 index 0000000..b961936 --- /dev/null +++ b/anyllm_docker_compose.yaml @@ -0,0 +1,47 @@ +services: + postgres: + image: postgres:16-alpine + container_name: anyllm_postgres + restart: always + environment: + POSTGRES_USER: gateway + POSTGRES_PASSWORD: gateway + POSTGRES_DB: gateway + ports: + - "5432:5432" + volumes: + - pg_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U gateway -d gateway"] + interval: 5s + timeout: 5s + retries: 5 + networks: + - gateway-network + + any-llm-gateway: + image: ghcr.io/mozilla-ai/any-llm-gateway:main + container_name: any_llm_gateway + platform: linux/amd64 + depends_on: + postgres: + condition: service_healthy + ports: + - "8000:8000" + volumes: + - ./gateway_config.yaml:/app/config.yaml + - ./service_account.json:/app/service_account.json + environment: + - DATABASE_URL=postgresql://gateway:gateway@postgres:5432/gateway + - GATEWAY_MASTER_KEY=${MASTER_KEY} + restart: unless-stopped + networks: + - gateway-network + +volumes: + pg_data: + +networks: + gateway-network: + name: mlpa-network + driver: bridge diff --git a/docs/index.html b/docs/index.html index 40af97a..2871919 100644 --- a/docs/index.html +++ b/docs/index.html @@ -3,7 +3,7 @@ - MLPA + any-llm Gateway - -

MLPA (1.0.0)

Download OpenAPI specification:Download

A proxy to verify App Attest/FxA payloads and proxy requests through LiteLLM.

-

Health

Health check endpoints.

-

Liveness Probe

Responses

Response samples

Content type
application/json
null

Readiness Probe

Responses

Response samples

Content type
application/json
null

Metrics

Prometheus metrics endpoints.

-

Get Metrics

Responses

Response samples

Content type
application/json
null

App Attest

Endpoints for verifying App Attest payloads.

-

Get Challenge

query Parameters
key_id
required
string (Key Id)

Responses

Response samples

Content type
application/json
null

Attest

Request Body schema: application/json
key_id
required
string (Key Id)
challenge_b64
required
string (Challenge B64)
attestation_obj_b64
required
string (Attestation Obj B64)

Responses

Request samples

Content type
application/json
{
  • "key_id": "string",
  • "challenge_b64": "string",
  • "attestation_obj_b64": "string"
}

Response samples

Content type
application/json
null

LiteLLM

Endpoints for interacting with LiteLLM.

-

Chat Completion

Authorize first using App Attest or FxA. Either pass the x-fxa-authorization header or include the {key_id, challenge, and assertion_obj} in the request body for app attest authorization. payload is always required and contains the prompt.

-
header Parameters
X-Fxa-Authorization (string) or X-Fxa-Authorization (null) (X-Fxa-Authorization)
Request Body schema: application/json
Any of
Stream (boolean) or Stream (null) (Stream)
Default: false
Array of objects (Messages)
Default: []
Model (string) or Model (null) (Model)
Default: "vertex_ai/mistral-small-2503"
Temperature (number) or Temperature (null) (Temperature)
Default: 0.1
Max Completion Tokens (integer) or Max Completion Tokens (null) (Max Completion Tokens)
Default: 1024
Top P (number) or Top P (null) (Top P)
Default: 0.01
key_id
required
string (Key Id)
challenge_b64
required
string (Challenge B64)
assertion_obj_b64
required
string (Assertion Obj B64)

Responses

Request samples

Content type
application/json
Example
{
  • "stream": false,
  • "messages": [ ],
  • "model": "vertex_ai/mistral-small-2503",
  • "temperature": 0.1,
  • "max_completion_tokens": 1024,
  • "top_p": 0.01,
  • "key_id": "string",
  • "challenge_b64": "string",
  • "assertion_obj_b64": "string"
}

Response samples

Content type
application/json
null

User

User Info

path Parameters
user_id
required
string (User Id)

Responses

Response samples

Content type
application/json
null
- + " fill="currentColor">

any-llm Gateway (0.1.0)

Download OpenAPI specification:

A clean FastAPI gateway for any-llm with API key management

+

chat

Chat Completions

OpenAI-compatible chat completions endpoint.

+

Supports both streaming and non-streaming responses. +Handles reasoning content from any-llm providers.

+

Authentication modes:

+
    +
  • Master key + user field: Use specified user (must exist)
  • +
  • API key + user field: Use specified user (must exist)
  • +
  • API key without user field: Use virtual user created with API key
  • +
+
Request Body schema: application/json
required
model
required
string (Model)
required
Array of objects (Messages)
User (string) or User (null) (User)
Temperature (number) or Temperature (null) (Temperature)
Max Tokens (integer) or Max Tokens (null) (Max Tokens)
Top P (number) or Top P (null) (Top P)
stream
boolean (Stream)
Default: false
Array of Tools (objects) or Tools (null) (Tools)
Tool Choice (string) or Tool Choice (object) or Tool Choice (null) (Tool Choice)
Response Format (object) or Response Format (null) (Response Format)

Responses

Request samples

Content type
application/json
{
  • "model": "string",
  • "messages": [
    ],
  • "user": "string",
  • "temperature": 0,
  • "max_tokens": 0,
  • "top_p": 0,
  • "stream": false,
  • "tools": [
    ],
  • "tool_choice": "string",
  • "response_format": { }
}

Response samples

Content type
application/json
null

keys

Create Key

Create a new API key.

+

Requires master key authentication.

+

If user_id is provided, the key will be associated with that user (creates user if it doesn't exist). +If user_id is not provided, a new user will be created automatically and the key will be associated with it.

+
Request Body schema: application/json
required
Key Name (string) or Key Name (null) (Key Name)

Optional name for the key

+
User Id (string) or User Id (null) (User Id)

Optional user ID to associate with this key

+
Expires At (string) or Expires At (null) (Expires At)

Optional expiration timestamp

+
object (Metadata)

Optional metadata

+

Responses

Request samples

Content type
application/json
{
  • "key_name": "string",
  • "user_id": "string",
  • "expires_at": "2019-08-24T14:15:22Z",
  • "metadata": { }
}

Response samples

Content type
application/json
{
  • "id": "string",
  • "key": "string",
  • "key_name": "string",
  • "user_id": "string",
  • "created_at": "string",
  • "expires_at": "string",
  • "is_active": true,
  • "metadata": { }
}

List Keys

List all API keys.

+

Requires master key authentication.

+
query Parameters
skip
integer (Skip)
Default: 0
limit
integer (Limit)
Default: 100

Responses

Response samples

Content type
application/json
[
  • {
    }
]

Get Key

Get details of a specific API key.

+

Requires master key authentication.

+
path Parameters
key_id
required
string (Key Id)

Responses

Response samples

Content type
application/json
{
  • "id": "string",
  • "key_name": "string",
  • "user_id": "string",
  • "created_at": "string",
  • "last_used_at": "string",
  • "expires_at": "string",
  • "is_active": true,
  • "metadata": { }
}

Update Key

Update an API key.

+

Requires master key authentication.

+
path Parameters
key_id
required
string (Key Id)
Request Body schema: application/json
required
Key Name (string) or Key Name (null) (Key Name)
Is Active (boolean) or Is Active (null) (Is Active)
Expires At (string) or Expires At (null) (Expires At)
Metadata (object) or Metadata (null) (Metadata)

Responses

Request samples

Content type
application/json
{
  • "key_name": "string",
  • "is_active": true,
  • "expires_at": "2019-08-24T14:15:22Z",
  • "metadata": { }
}

Response samples

Content type
application/json
{
  • "id": "string",
  • "key_name": "string",
  • "user_id": "string",
  • "created_at": "string",
  • "last_used_at": "string",
  • "expires_at": "string",
  • "is_active": true,
  • "metadata": { }
}

Delete Key

Delete (revoke) an API key.

+

Requires master key authentication.

+
path Parameters
key_id
required
string (Key Id)

Responses

Response samples

Content type
application/json
{
  • "detail": [
    ]
}

users

Create User

Create a new user.

+
Request Body schema: application/json
required
user_id
required
string (User Id)

Unique user identifier

+
Alias (string) or Alias (null) (Alias)

Optional admin-facing alias

+
Budget Id (string) or Budget Id (null) (Budget Id)

Optional budget ID

+
blocked
boolean (Blocked)
Default: false

Whether user is blocked

+
object (Metadata)

Optional metadata

+

Responses

Request samples

Content type
application/json
{
  • "user_id": "string",
  • "alias": "string",
  • "budget_id": "string",
  • "blocked": false,
  • "metadata": { }
}

Response samples

Content type
application/json
{
  • "user_id": "string",
  • "alias": "string",
  • "spend": 0,
  • "budget_id": "string",
  • "blocked": true,
  • "created_at": "string",
  • "updated_at": "string",
  • "metadata": { }
}

List Users

List all users with pagination.

+
query Parameters
skip
integer (Skip)
Default: 0
limit
integer (Limit)
Default: 100

Responses

Response samples

Content type
application/json
[
  • {
    }
]

Get User

Get details of a specific user.

+
path Parameters
user_id
required
string (User Id)

Responses

Response samples

Content type
application/json
{
  • "user_id": "string",
  • "alias": "string",
  • "spend": 0,
  • "budget_id": "string",
  • "blocked": true,
  • "created_at": "string",
  • "updated_at": "string",
  • "metadata": { }
}

Update User

Update a user.

+
path Parameters
user_id
required
string (User Id)
Request Body schema: application/json
required
Alias (string) or Alias (null) (Alias)
Budget Id (string) or Budget Id (null) (Budget Id)
Blocked (boolean) or Blocked (null) (Blocked)
Metadata (object) or Metadata (null) (Metadata)

Responses

Request samples

Content type
application/json
{
  • "alias": "string",
  • "budget_id": "string",
  • "blocked": true,
  • "metadata": { }
}

Response samples

Content type
application/json
{
  • "user_id": "string",
  • "alias": "string",
  • "spend": 0,
  • "budget_id": "string",
  • "blocked": true,
  • "created_at": "string",
  • "updated_at": "string",
  • "metadata": { }
}

Delete User

Delete a user.

+
path Parameters
user_id
required
string (User Id)

Responses

Response samples

Content type
application/json
{
  • "detail": [
    ]
}

Get User Usage

Get usage history for a specific user.

+
path Parameters
user_id
required
string (User Id)
query Parameters
skip
integer (Skip)
Default: 0
limit
integer (Limit)
Default: 100

Responses

Response samples

Content type
application/json
[
  • {
    }
]

budgets

Create Budget

Create a new budget.

+
Request Body schema: application/json
required
Max Budget (number) or Max Budget (null) (Max Budget)

Maximum spending limit

+
Budget Duration (string) or Budget Duration (null) (Budget Duration)

Budget duration (e.g., 'monthly', 'daily')

+
Budget Reset At (string) or Budget Reset At (null) (Budget Reset At)

When budget resets

+

Responses

Request samples

Content type
application/json
{
  • "max_budget": 0,
  • "budget_duration": "string",
  • "budget_reset_at": "2019-08-24T14:15:22Z"
}

Response samples

Content type
application/json
{
  • "budget_id": "string",
  • "max_budget": 0,
  • "budget_duration": "string",
  • "budget_reset_at": "string",
  • "created_at": "string",
  • "updated_at": "string"
}

List Budgets

List all budgets with pagination.

+
query Parameters
skip
integer (Skip)
Default: 0
limit
integer (Limit)
Default: 100

Responses

Response samples

Content type
application/json
[
  • {
    }
]

Get Budget

Get details of a specific budget.

+
path Parameters
budget_id
required
string (Budget Id)

Responses

Response samples

Content type
application/json
{
  • "budget_id": "string",
  • "max_budget": 0,
  • "budget_duration": "string",
  • "budget_reset_at": "string",
  • "created_at": "string",
  • "updated_at": "string"
}

Update Budget

Update a budget.

+
path Parameters
budget_id
required
string (Budget Id)
Request Body schema: application/json
required
Max Budget (number) or Max Budget (null) (Max Budget)
Budget Duration (string) or Budget Duration (null) (Budget Duration)
Budget Reset At (string) or Budget Reset At (null) (Budget Reset At)

Responses

Request samples

Content type
application/json
{
  • "max_budget": 0,
  • "budget_duration": "string",
  • "budget_reset_at": "2019-08-24T14:15:22Z"
}

Response samples

Content type
application/json
{
  • "budget_id": "string",
  • "max_budget": 0,
  • "budget_duration": "string",
  • "budget_reset_at": "string",
  • "created_at": "string",
  • "updated_at": "string"
}

Delete Budget

Delete a budget.

+
path Parameters
budget_id
required
string (Budget Id)

Responses

Response samples

Content type
application/json
{
  • "detail": [
    ]
}

pricing

Set Pricing

Set or update pricing for a model.

+
Request Body schema: application/json
required
model_key
required
string (Model Key)

Model identifier in format 'provider:model'

+
input_price_per_million
required
number (Input Price Per Million)

Price per 1M input tokens

+
output_price_per_million
required
number (Output Price Per Million)

Price per 1M output tokens

+

Responses

Request samples

Content type
application/json
{
  • "model_key": "string",
  • "input_price_per_million": 0,
  • "output_price_per_million": 0
}

Response samples

Content type
application/json
{
  • "model_key": "string",
  • "input_price_per_million": 0,
  • "output_price_per_million": 0,
  • "created_at": "string",
  • "updated_at": "string"
}

List Pricing

List all model pricing.

+
query Parameters
skip
integer (Skip)
Default: 0
limit
integer (Limit)
Default: 100

Responses

Response samples

Content type
application/json
[
  • {
    }
]

Get Pricing

Get pricing for a specific model.

+
path Parameters
model_key
required
string (Model Key)

Responses

Response samples

Content type
application/json
{
  • "model_key": "string",
  • "input_price_per_million": 0,
  • "output_price_per_million": 0,
  • "created_at": "string",
  • "updated_at": "string"
}

Delete Pricing

Delete pricing for a model.

+
path Parameters
model_key
required
string (Model Key)

Responses

Response samples

Content type
application/json
{
  • "detail": [
    ]
}

health

Health Check

General health check endpoint.

+

Returns basic health status. For infrastructure monitoring, +use /health/readiness or /health/liveness instead.

+

Responses

Response samples

Content type
application/json
{
  • "property1": "string",
  • "property2": "string"
}

Health Liveness

Liveness probe endpoint.

+

Simple check to verify the process is alive and responding. +Used by Kubernetes/container orchestrators for liveness probes.

+

Returns: + Plain text "I'm alive!" message

+

Responses

Response samples

Content type
application/json
"string"

Health Readiness

Readiness probe endpoint.

+

Checks if the gateway is ready to serve requests by validating:

+
    +
  • Database connectivity
  • +
  • Service availability
  • +
+

Used by Kubernetes/container orchestrators for readiness probes. +Returns HTTP 503 if any dependency is unavailable.

+

Returns: + dict: Status object with health details

+

Raises: + HTTPException: 503 if service is not ready

+

Responses

Response samples

Content type
application/json
{ }
+ diff --git a/gateway_config.yaml b/gateway_config.yaml new file mode 100644 index 0000000..3cdf124 --- /dev/null +++ b/gateway_config.yaml @@ -0,0 +1,14 @@ +database_url: "${DATABASE_URL}" + +host: "0.0.0.0" +port: 8000 + +# Master key for protecting key management endpoints (set via environment variable) +master_key: "${GATEWAY_MASTER_KEY}" + +providers: + # Vertex AI configuration (for Google Cloud) + vertexai: + credentials: "/app/service_account.json" # Path to service account JSON file + project: "fx-gen-ai-sandbox" # GCP project ID + location: "us-south1" # GCP region diff --git a/litellm_config.yaml b/litellm_config.yaml deleted file mode 100644 index c534392..0000000 --- a/litellm_config.yaml +++ /dev/null @@ -1,22 +0,0 @@ -model_list: - - model_name: vertex_ai/qwen/qwen3-235b-a22b-instruct-2507-maas - litellm_params: - model: vertex_ai/qwen/qwen3-235b-a22b-instruct-2507-maas - vertex_project: "fx-gen-ai-sandbox" - vertex_location: "us-south1" - vertex_credentials: "/app/service_account.json" - - model_name: vertex_ai/mistral-small-2503 - litellm_params: - model: vertex_ai/mistral-small-2503 - vertex_project: "fx-gen-ai-sandbox" - vertex_credentials: "/app/service_account.json" - - model_name: openai/gpt-4o - litellm_params: - model: openai/gpt-4o - api_key: os.environ/OPENAI_API_KEY - -general_settings: - store_model_in_db: true - master_key: os.environ/MASTER_KEY - database_url: os.environ/PG_DB_URL - litellm_key_header_name: X-Litellm-Key diff --git a/litellm_docker_compose.yaml b/litellm_docker_compose.yaml deleted file mode 100644 index 10e2f0b..0000000 --- a/litellm_docker_compose.yaml +++ /dev/null @@ -1,42 +0,0 @@ -version: "3.8" - -services: - postgres: - image: postgres:15 - container_name: litellm_postgres - restart: always - environment: - POSTGRES_USER: litellm - POSTGRES_PASSWORD: litellm - POSTGRES_DB: litellm - ports: - - "5432:5432" - volumes: - - pg_data:/var/lib/postgresql/data - healthcheck: - test: ["CMD-SHELL", "pg_isready -U litellm -d litellm"] - interval: 5s - timeout: 5s - retries: 5 - - litellm: - image: ghcr.io/berriai/litellm:v1.77.3-stable - container_name: litellm - platform: linux/amd64 - depends_on: - postgres: - condition: service_healthy - ports: - - "4000:4000" - volumes: - - ./litellm_config.yaml:/app/config.yaml - - ./service_account.json:/app/service_account.json - env_file: - - .env - environment: - PG_DB_URL: postgresql://litellm:litellm@postgres:5432/litellm - PORT: 4000 - command: ["--config", "/app/config.yaml"] - -volumes: - pg_data: diff --git a/mlpa_docker_compose.yaml b/mlpa_docker_compose.yaml index 2e53c35..0f7f3fe 100644 --- a/mlpa_docker_compose.yaml +++ b/mlpa_docker_compose.yaml @@ -10,8 +10,8 @@ services: environment: - PG_DB_URL=${PG_DB_URL} - MASTER_KEY=${MASTER_KEY} - - LITELLM_API_BASE=${LITELLM_API_BASE} - - LITELLM_DB_NAME=${LITELLM_DB_NAME} + - GATEWAY_API_BASE=${GATEWAY_API_BASE} + - GATEWAY_DB_NAME=${GATEWAY_DB_NAME} - CHALLENGE_EXPIRY_SECONDS=${CHALLENGE_EXPIRY_SECONDS} - PORT=${PORT} - APP_BUNDLE_ID=${APP_BUNDLE_ID} diff --git a/pyproject.toml b/pyproject.toml index 7dda1da..dbd9b84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "mlpa" version = "0.1.0" -description = "A proxy to verify App Attest/FxA payloads and proxy requests using LiteLLM virtual keys." +description = "A proxy to verify App Attest/FxA payloads and proxy requests using Any-LLM-Gateway virtual keys." authors = [{ name = "Noah Podgurski", email = "npodgurski@mozilla.com" }] readme = "README.md" requires-python = ">=3.12" diff --git a/scripts/create-app-attest-database.sh b/scripts/create-app-attest-database.sh index d1afbcd..8e77631 100644 --- a/scripts/create-app-attest-database.sh +++ b/scripts/create-app-attest-database.sh @@ -1 +1 @@ -docker exec -it litellm_postgres psql -U litellm -c "CREATE DATABASE app_attest;" +docker exec -it anyllm_postgres psql -U gateway -c "CREATE DATABASE app_attest;" diff --git a/scripts/create-docs.sh b/scripts/create-docs.sh new file mode 100755 index 0000000..690bbf9 --- /dev/null +++ b/scripts/create-docs.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -e +# set dir to the root of the project +cd "$(dirname "$0")/.." + +OPENAPI_URL="http://localhost:8000/openapi.json" +OUTPUT="docs/index.html" +API_JSON="openapi.json" + +# Optional: fetch the OpenAPI JSON first +curl -sSL "$OPENAPI_URL" -o "$API_JSON" + +# If you have redoc-cli installed, bundle into a standalone HTML +# Ensure you have npm and redoc-cli installed: npm install -g redoc-cli +npx -y @redocly/cli@latest build-docs "$API_JSON" -o "$OUTPUT" + +rm $API_JSON + +echo "Generated $OUTPUT from $OPENAPI_URL" diff --git a/src/proxy/core/completions.py b/src/proxy/core/completions.py index 38c16f4..6c04216 100644 --- a/src/proxy/core/completions.py +++ b/src/proxy/core/completions.py @@ -5,13 +5,13 @@ from fastapi import HTTPException from .classes import AuthorizedChatRequest -from .config import LITELLM_COMPLETIONS_URL, LITELLM_HEADERS +from .config import GATEWAY_COMPLETIONS_URL, GATEWAY_HEADERS from .prometheus_metrics import PrometheusResult, metrics async def stream_completion(authorized_chat_request: AuthorizedChatRequest): """ - Proxies a streaming request to LiteLLM. + Proxies a streaming request to any-llm-gateway. Yields response chunks as they are received and logs metrics. """ start_time = time.time() @@ -31,8 +31,8 @@ async def stream_completion(authorized_chat_request: AuthorizedChatRequest): async with httpx.AsyncClient() as client: async with client.stream( "POST", - LITELLM_COMPLETIONS_URL, - headers=LITELLM_HEADERS, + GATEWAY_COMPLETIONS_URL, + headers=GATEWAY_HEADERS, json=body, timeout=30, ) as response: @@ -66,7 +66,7 @@ async def stream_completion(authorized_chat_request: AuthorizedChatRequest): ) return except Exception as e: - print(f"Failed to proxy request to {LITELLM_COMPLETIONS_URL}: {e}") + print(f"Failed to proxy request to {GATEWAY_COMPLETIONS_URL}: {e}") return finally: metrics.chat_completion_latency.labels(result=result).observe( @@ -76,7 +76,7 @@ async def stream_completion(authorized_chat_request: AuthorizedChatRequest): async def get_completion(authorized_chat_request: AuthorizedChatRequest): """ - Proxies a non-streaming request to LiteLLM. + Proxies a non-streaming request to any-llm-gateway. """ start_time = time.time() body = { @@ -92,7 +92,7 @@ async def get_completion(authorized_chat_request: AuthorizedChatRequest): try: async with httpx.AsyncClient() as client: response = await client.post( - LITELLM_COMPLETIONS_URL, headers=LITELLM_HEADERS, json=body, timeout=10 + GATEWAY_COMPLETIONS_URL, headers=GATEWAY_HEADERS, json=body, timeout=10 ) response.raise_for_status() data = response.json() @@ -109,7 +109,7 @@ async def get_completion(authorized_chat_request: AuthorizedChatRequest): raise HTTPException( status_code=500, detail={ - "error": f"Failed to proxy request to {LITELLM_COMPLETIONS_URL}: {e}" + "error": f"Failed to proxy request to {GATEWAY_COMPLETIONS_URL}: {e}" }, ) finally: diff --git a/src/proxy/core/config.py b/src/proxy/core/config.py index d76709b..1fd7982 100644 --- a/src/proxy/core/config.py +++ b/src/proxy/core/config.py @@ -7,13 +7,13 @@ class Env(BaseSettings): METRICS_LOG_FILE: str = "metrics.jsonl" # PostgreSQL url (no /database) - PG_DB_URL: str = "postgresql://litellm:litellm@localhost:5432" + PG_DB_URL: str = "postgresql://gateway:gateway@localhost:5432" - # LiteLLM + # any-llm-gateway MASTER_KEY: str = "sk-default" OPENAI_API_KEY: str = "sk-add-your-key" - LITELLM_API_BASE: str = "http://localhost:4000" - LITELLM_DB_NAME: str = "litellm" + GATEWAY_API_BASE: str = "http://localhost:8000" + GATEWAY_DB_NAME: str = "gateway" CHALLENGE_EXPIRY_SECONDS: int = 300 # 5 minutes PORT: int | None = 8080 @@ -40,9 +40,10 @@ class Env(BaseSettings): env = Env() -LITELLM_READINESS_URL = f"{env.LITELLM_API_BASE}/health/readiness" -LITELLM_COMPLETIONS_URL = f"{env.LITELLM_API_BASE}/v1/chat/completions" -LITELLM_HEADERS = { +GATEWAY_READINESS_URL = f"{env.GATEWAY_API_BASE}/health/readiness" +GATEWAY_COMPLETIONS_URL = f"{env.GATEWAY_API_BASE}/v1/chat/completions" +GATEWAY_USERS_URL = f"{env.GATEWAY_API_BASE}/v1/users" +GATEWAY_HEADERS = { "Content-Type": "application/json", - "X-LiteLLM-Key": f"Bearer {env.MASTER_KEY}", + "X-AnyLLM-Key": f"Bearer {env.MASTER_KEY}", } diff --git a/src/proxy/core/pg_services/gateway_pg_service.py b/src/proxy/core/pg_services/gateway_pg_service.py new file mode 100644 index 0000000..31ded0e --- /dev/null +++ b/src/proxy/core/pg_services/gateway_pg_service.py @@ -0,0 +1,83 @@ +import httpx +from fastapi import Header, HTTPException + +from ..classes import UserUpdatePayload +from ..config import GATEWAY_HEADERS, GATEWAY_USERS_URL, env +from .pg_service import PGService + + +class GatewayPGService(PGService): + """ + Service for interacting with any-llm-gateway's database and API. + Uses REST API calls to manage users instead of direct database access. + """ + + def __init__(self): + super().__init__(env.GATEWAY_DB_NAME) + + async def get_user(self, user_id: str): + """Get user via any-llm-gateway REST API""" + async with httpx.AsyncClient() as client: + try: + response = await client.get( + f"{GATEWAY_USERS_URL}/{user_id}", + headers=GATEWAY_HEADERS, + ) + if response.status_code == 404: + return None + response.raise_for_status() + return response.json() + except Exception as e: + raise HTTPException( + status_code=500, detail={"error": f"Error fetching user: {e}"} + ) + + async def update_user( + self, request: UserUpdatePayload, master_key: str = Header(...) + ): + """ + Update user via any-llm-gateway REST API + example POST body: { + "user_id": "test-user-32", + "blocked": false, + "budget_id": null, + "alias": null + } + """ + if master_key != f"Bearer {env.MASTER_KEY}": + raise HTTPException(status_code=401, detail={"error": "Unauthorized"}) + + update_data = request.model_dump(exclude_unset=True) + user_id = update_data.pop("user_id", request.user_id) + + if not update_data: + return {"status": "no fields to update", "user_id": user_id} + + async with httpx.AsyncClient() as client: + try: + response = await client.patch( + f"{GATEWAY_USERS_URL}/{user_id}", + headers=GATEWAY_HEADERS, + json=update_data, + ) + if response.status_code == 404: + raise HTTPException( + status_code=404, + detail=f"User with user_id '{user_id}' not found.", + ) + response.raise_for_status() + return response.json() + except httpx.HTTPStatusError as e: + if e.response.status_code == 404: + raise HTTPException( + status_code=404, + detail=f"User with user_id '{user_id}' not found.", + ) + raise HTTPException( + status_code=e.response.status_code, + detail={"error": f"Error updating user: {e}"}, + ) + except Exception as e: + raise HTTPException( + status_code=500, detail={"error": f"Error updating user: {e}"} + ) diff --git a/src/proxy/core/pg_services/litellm_pg_service.py b/src/proxy/core/pg_services/litellm_pg_service.py deleted file mode 100644 index 9a1381d..0000000 --- a/src/proxy/core/pg_services/litellm_pg_service.py +++ /dev/null @@ -1,64 +0,0 @@ -from fastapi import Header, HTTPException - -from ..classes import UserUpdatePayload -from ..config import env -from .pg_service import PGService - - -class LiteLLMPGService(PGService): - """ - This service is primarily intended for updating user fields (directly via the DB) that are not supported by the free tier of LiteLLM. - """ - - def __init__(self): - super().__init__(env.LITELLM_DB_NAME) - - async def get_user(self, user_id: str): - query = 'SELECT * FROM "LiteLLM_EndUserTable" WHERE user_id = $1' - user = await self.pg.fetchrow(query, user_id) - return dict(user) if user else None - - async def update_user( - self, request: UserUpdatePayload, master_key: str = Header(...) - ): - """ - Allow updating the user's (End User/Customer)'s information - Free tier of LiteLLM does not support this, so updating the DB directly - is a workaround. - example POST body: { - "user_id": "test-user-32", - "blocked": false, - "budget_id": null, - "alias": null - } - """ - if master_key != f"Bearer {env.MASTER_KEY}": - raise HTTPException(status_code=401, detail={"error": "Unauthorized"}) - - update_data = request.model_dump(exclude_unset=True) - user_id = update_data.pop("user_id", request.user_id) - - if not update_data: - return {"status": "no fields to update", "user_id": user_id} - - updated_user_record = None - try: - set_clause = ", ".join( - [f'"{key}" = ${i + 1}' for i, key in enumerate(update_data.keys())] - ) - values = list(update_data.values()) - where_value_index = len(values) + 1 - - query = f'UPDATE "LiteLLM_EndUserTable" SET {set_clause} WHERE user_id = ${where_value_index} RETURNING *' - updated_user_record = await self.pg.fetchrow(query, *values, user_id) - except Exception as e: - raise HTTPException( - status_code=500, detail={"error": f"Error updating user: {e}"} - ) - - if updated_user_record is None: - raise HTTPException( - status_code=404, detail=f"User with user_id '{user_id}' not found." - ) - - return dict(updated_user_record) diff --git a/src/proxy/core/pg_services/services.py b/src/proxy/core/pg_services/services.py index 82d0dc0..5cea005 100644 --- a/src/proxy/core/pg_services/services.py +++ b/src/proxy/core/pg_services/services.py @@ -1,5 +1,5 @@ from .app_attest_pg_service import AppAttestPGService -from .litellm_pg_service import LiteLLMPGService +from .gateway_pg_service import GatewayPGService -litellm_pg = LiteLLMPGService() +gateway_pg = GatewayPGService() app_attest_pg = AppAttestPGService() diff --git a/src/proxy/core/routers/health/health.py b/src/proxy/core/routers/health/health.py index 24e0b58..03ea884 100644 --- a/src/proxy/core/routers/health/health.py +++ b/src/proxy/core/routers/health/health.py @@ -1,8 +1,8 @@ import httpx from fastapi import APIRouter -from ...config import LITELLM_HEADERS, LITELLM_READINESS_URL -from ...pg_services.services import app_attest_pg, litellm_pg +from ...config import GATEWAY_HEADERS, GATEWAY_READINESS_URL +from ...pg_services.services import app_attest_pg, gateway_pg router = APIRouter() @@ -14,21 +14,20 @@ async def liveness_probe(): @router.get("/readiness", tags=["Health"]) async def readiness_probe(): - # todo add check to PG and LiteLLM status here - pg_status = litellm_pg.check_status() + pg_status = gateway_pg.check_status() app_attest_pg_status = app_attest_pg.check_status() - litellm_status = {} + gateway_status = {} async with httpx.AsyncClient() as client: response = await client.get( - LITELLM_READINESS_URL, headers=LITELLM_HEADERS, timeout=3 + GATEWAY_READINESS_URL, headers=GATEWAY_HEADERS, timeout=3 ) data = response.json() - litellm_status = data + gateway_status = data return { "status": "connected", "pg_server_dbs": { "postgres": "connected" if pg_status else "offline", "app_attest": "connected" if app_attest_pg_status else "offline", }, - "litellm": litellm_status, + "any_llm_gateway": gateway_status, } diff --git a/src/proxy/core/routers/user/user.py b/src/proxy/core/routers/user/user.py index 80a499e..e3d0807 100644 --- a/src/proxy/core/routers/user/user.py +++ b/src/proxy/core/routers/user/user.py @@ -1,7 +1,7 @@ import httpx from fastapi import APIRouter, HTTPException -from ...config import LITELLM_HEADERS, env +from ...config import GATEWAY_HEADERS, GATEWAY_USERS_URL router = APIRouter() @@ -12,14 +12,15 @@ async def user_info(user_id: str): raise HTTPException(status_code=400, detail="Missing user_id") async with httpx.AsyncClient() as client: - params = {"end_user_id": user_id} response = await client.get( - f"{env.LITELLM_API_BASE}/customer/info", - params=params, - headers=LITELLM_HEADERS, + f"{GATEWAY_USERS_URL}/{user_id}", + headers=GATEWAY_HEADERS, ) + + if response.status_code == 404: + raise HTTPException(status_code=404, detail="User not found") + + response.raise_for_status() user = response.json() - if not user: - raise HTTPException(status_code=404, detail="User not found") return user diff --git a/src/proxy/core/utils.py b/src/proxy/core/utils.py index 476dab8..f5cc68d 100644 --- a/src/proxy/core/utils.py +++ b/src/proxy/core/utils.py @@ -3,11 +3,11 @@ import httpx from fastapi import HTTPException -from .config import LITELLM_HEADERS, env +from .config import GATEWAY_HEADERS, GATEWAY_USERS_URL async def get_or_create_user(user_id: str): - """Returns user info from LiteLLM, creating the user if they don't exist. + """Returns user info from any-llm-gateway, creating the user if they don't exist. Args: user_id (str): The user ID to look up or create. Returns: @@ -16,27 +16,28 @@ async def get_or_create_user(user_id: str): async with httpx.AsyncClient() as client: try: - params = {"end_user_id": user_id} response = await client.get( - f"{env.LITELLM_API_BASE}/customer/info", - params=params, - headers=LITELLM_HEADERS, + f"{GATEWAY_USERS_URL}/{user_id}", + headers=GATEWAY_HEADERS, ) - user = response.json() - if not user.get("user_id"): - # add budget details or budget_id if necessary - await client.post( - f"{env.LITELLM_API_BASE}/customer/new", + if response.status_code == 200: + return [response.json(), False] + + if response.status_code == 404: + create_response = await client.post( + GATEWAY_USERS_URL, json={"user_id": user_id}, - headers=LITELLM_HEADERS, - ) - response = await client.get( - f"{env.LITELLM_API_BASE}/customer/info", - params=params, - headers=LITELLM_HEADERS, + headers=GATEWAY_HEADERS, ) - return [response.json(), True] - return [user, False] + create_response.raise_for_status() + return [create_response.json(), True] + response.raise_for_status() + return [response.json(), False] + except httpx.HTTPStatusError as e: + raise HTTPException( + status_code=e.response.status_code, + detail={"error": f"Error fetching user info: {e}"}, + ) except Exception as e: raise HTTPException( status_code=500, detail={"error": f"Error fetching user info: {e}"} diff --git a/src/proxy/run.py b/src/proxy/run.py index 55b68c5..e28646b 100644 --- a/src/proxy/run.py +++ b/src/proxy/run.py @@ -11,7 +11,7 @@ from .core.classes import AssertionRequest, AuthorizedChatRequest, ChatRequest from .core.completions import get_completion, stream_completion from .core.config import env -from .core.pg_services.services import app_attest_pg, litellm_pg +from .core.pg_services.services import app_attest_pg, gateway_pg from .core.prometheus_metrics import metrics from .core.routers.appattest import app_attest_auth, appattest_router from .core.routers.fxa import fxa_auth, fxa_router @@ -26,7 +26,10 @@ "name": "App Attest", "description": "Endpoints for verifying App Attest payloads.", }, - {"name": "LiteLLM", "description": "Endpoints for interacting with LiteLLM."}, + { + "name": "Gateway", + "description": "Endpoints for interacting with any-llm-gateway.", + }, ] @@ -61,10 +64,10 @@ async def authorize( @asynccontextmanager async def lifespan(app: FastAPI): - await litellm_pg.connect() + await gateway_pg.connect() await app_attest_pg.connect() yield - await litellm_pg.disconnect() + await gateway_pg.disconnect() await app_attest_pg.disconnect() @@ -72,7 +75,7 @@ async def lifespan(app: FastAPI): app = FastAPI( title="MLPA", - description="A proxy to verify App Attest/FxA payloads and proxy requests through LiteLLM.", + description="A proxy to verify App Attest/FxA payloads and proxy requests through any-llm-gateway.", version="1.0.0", docs_url="/api/docs", openapi_tags=tags_metadata, @@ -118,7 +121,7 @@ async def get_metrics(): @app.post( "/v1/chat/completions", - tags=["LiteLLM"], + tags=["Gateway"], description="Authorize first using App Attest or FxA. Either pass the x-fxa-authorization header or include the `{key_id, challenge, and assertion_obj}` in the request body for app attest authorization. `payload` is always required and contains the prompt.", ) async def chat_completion( diff --git a/src/proxy/test.py b/src/proxy/test.py index ff2c57e..6cbd573 100644 --- a/src/proxy/test.py +++ b/src/proxy/test.py @@ -1,18 +1,8 @@ """ -Simulate projected load - ---- Run this in DB to fill DB with 1M users --- -DELETE FROM "LiteLLM_EndUserTable"; -WITH RECURSIVE generate_series(id) AS ( - SELECT 0 - UNION ALL - SELECT id + 1 FROM generate_series WHERE id < 1000000 -) -INSERT INTO "LiteLLM_EndUserTable" ("user_id") -SELECT - 'test-user-' || id AS "user_id" -FROM generate_series; -SELECT * FROM public."LiteLLM_EndUserTable" +Generate 1M users using any-llm-gateway API + +This script creates 1 million users using the any-llm-gateway API +with async batching for efficient user creation. """ import asyncio @@ -21,6 +11,7 @@ import random import time import uuid +from typing import List, Tuple import httpx import jwt @@ -30,19 +21,130 @@ load_dotenv() -# Projected total load +TOTAL_USERS = 1_000_000 +BATCH_SIZE = 10 # Number of users to create in parallel +PROXY_API_BASE = os.getenv("GATEWAY_API_BASE", "http://localhost:8000") +GATEWAY_MASTER_KEY = os.getenv("GATEWAY_MASTER_KEY") USERS = 2_500_000 REQ_PER_MINUTE = 465 -# REQ_PER_SECOND = REQ_PER_MINUTE / 60 REQ_PER_SECOND = 50 - -PROXY_API_BASE = "http://localhost:8080" JWT_SECRET = os.getenv("JWT_SECRET") +async def create_user_batch( + user_ids: List[str], client: httpx.AsyncClient +) -> Tuple[int, int]: + """ + Create a batch of users using the any-llm-gateway API. + + Args: + user_ids: List of user IDs to create + client: HTTP client for making requests + + Returns: + Tuple of (successful_creations, failed_creations) + """ + if not GATEWAY_MASTER_KEY: + raise ValueError("GATEWAY_MASTER_KEY environment variable is required") + + headers = { + "X-AnyLLM-Key": f"Bearer {GATEWAY_MASTER_KEY}", + "Content-Type": "application/json", + } + + success_count = 0 + failed_count = 0 + + tasks = [] + for user_id in user_ids: + payload = { + "user_id": user_id, + "alias": f"Test User {user_id}", + "blocked": False, + "metadata": {"created_by": "bulk_script", "batch_id": str(uuid.uuid4())}, + } + + task = client.post(f"{PROXY_API_BASE}/v1/users", json=payload, headers=headers) + tasks.append(task) + + # Wait for all requests in the batch to complete + responses = await asyncio.gather(*tasks, return_exceptions=True) + + for response in responses: + if isinstance(response, Exception): + failed_count += 1 + elif hasattr(response, "status_code"): + if response.status_code == 201: + success_count += 1 + elif response.status_code == 409: + # User already exists, count as success + success_count += 1 + else: + failed_count += 1 + else: + failed_count += 1 + + return success_count, failed_count + + +async def generate_users(): + """ + Generate 1M users using the any-llm-gateway API with async batching. + """ + if not GATEWAY_MASTER_KEY: + print("Error: GATEWAY_MASTER_KEY environment variable is required") + return + + print(f"Starting to create {TOTAL_USERS:,} users...") + print(f"Gateway URL: {PROXY_API_BASE}") + print(f"Batch size: {BATCH_SIZE}") + print() + + start_time = time.time() + total_success = 0 + total_failed = 0 + + async with httpx.AsyncClient(timeout=30.0) as client: + with tqdm.tqdm(total=TOTAL_USERS, desc="Creating users", unit="users") as pbar: + for batch_start in range(0, TOTAL_USERS, BATCH_SIZE): + batch_end = min(batch_start + BATCH_SIZE, TOTAL_USERS) + user_ids = [f"test-user-{i}" for i in range(batch_start, batch_end)] + + try: + success, failed = await create_user_batch(user_ids, client) + total_success += success + total_failed += failed + + pbar.update(len(user_ids)) + pbar.set_postfix( + { + "Success": f"{total_success:,}", + "Failed": f"{total_failed:,}", + "Rate": f"{total_success / (time.time() - start_time):.1f}/s", + } + ) + + except Exception as e: + print(f"Error in batch {batch_start}-{batch_end}: {e}") + total_failed += len(user_ids) + pbar.update(len(user_ids)) + + end_time = time.time() + duration = end_time - start_time + + print(f"\nUser creation completed!") + print(f"Total users created: {total_success:,}") + print(f"Total failures: {total_failed:,}") + print(f"Duration: {duration:.2f} seconds") + print(f"Average rate: {total_success / duration:.1f} users/second") + print( + f"Success rate: {(total_success / (total_success + total_failed)) * 100:.1f}%" + ) + + class User: - def __init__(self, id: str = None): - self.id = id or str(uuid.uuid4()) + def __init__(self, user_id: str = None): + self.id = user_id or str(uuid.uuid4()) self.stats = {} self.key = jwt.encode({"user_id": self.id}, JWT_SECRET, algorithm="HS256") @@ -79,8 +181,8 @@ async def test_server_rps_limit(max_rps=8, test_duration=10): """ Test the maximum requests per second (RPS) the server can handle. Args: - max_rps (int): Maximum RPS to test. - test_duration (int): Duration of the test in seconds. + max_rps (int): Maximum RPS to test. + test_duration (int): Duration of the test in seconds. """ users = [User(f"test-user-{i}") for i in range(USERS)] random.shuffle(users) @@ -112,7 +214,7 @@ async def test_server_rps_limit(max_rps=8, test_duration=10): def calculate_metric_stats(): - with open("metrics.jsonl", "r") as f: + with open("metrics.jsonl", "r", encoding="utf-8") as f: data = [json.loads(line) for line in f.readlines()] metrics = [ @@ -152,5 +254,22 @@ def calculate_metric_stats(): if __name__ == "__main__": - asyncio.run(test_server_rps_limit(5, 20)) - calculate_metric_stats() + import sys + + if len(sys.argv) > 1 and sys.argv[1] == "generate-users": + # Generate 1M users using any-llm-gateway API + asyncio.run(generate_users()) + elif len(sys.argv) > 1 and sys.argv[1] == "test-rps": + # Run the original RPS test + asyncio.run(test_server_rps_limit(5, 20)) + calculate_metric_stats() + else: + print("Usage:") + print( + " python test.py generate-users # Generate 1M users via any-llm-gateway API" + ) + print(" python test.py test-rps # Run RPS test") + print() + print("Environment variables required for generate-users:") + print(" GATEWAY_MASTER_KEY - Master key for any-llm-gateway") + print(" PROXY_API_BASE - Gateway URL (default: http://localhost:8000)") diff --git a/src/tests/integration/conftest.py b/src/tests/integration/conftest.py index 8f7b40b..22c01ce 100644 --- a/src/tests/integration/conftest.py +++ b/src/tests/integration/conftest.py @@ -5,7 +5,7 @@ from tests.mocks import ( MockAppAttestPGService, MockFxAService, - MockLiteLLMPGService, + MockGatewayPGService, mock_get_completion, mock_get_or_create_user, mock_verify_assert, @@ -18,7 +18,7 @@ def mocked_client_integration(mocker): This fixture mocks the database services and provides a TestClient. """ mock_app_attest_pg = MockAppAttestPGService() - mock_litellm_pg = MockLiteLLMPGService() + mock_gateway_pg = MockGatewayPGService() mock_fxa_client = MockFxAService( "test-client-id", "test-client-secret", "https://test-fxa.com" ) @@ -31,8 +31,8 @@ def mocked_client_integration(mocker): mocker.patch( "proxy.core.routers.appattest.appattest.app_attest_pg", mock_app_attest_pg ) - mocker.patch("proxy.run.litellm_pg", mock_litellm_pg) - mocker.patch("proxy.core.routers.health.health.litellm_pg", mock_litellm_pg) + mocker.patch("proxy.run.gateway_pg", mock_gateway_pg) + mocker.patch("proxy.core.routers.health.health.gateway_pg", mock_gateway_pg) mocker.patch("proxy.core.routers.fxa.fxa.client", mock_fxa_client) @@ -44,7 +44,7 @@ def mocked_client_integration(mocker): mocker.patch( "proxy.run.get_or_create_user", lambda *args, **kwargs: mock_get_or_create_user( - mock_litellm_pg, *args, **kwargs + mock_gateway_pg, *args, **kwargs ), ) mocker.patch( diff --git a/src/tests/integration/test_health.py b/src/tests/integration/test_health.py index d677475..7136334 100644 --- a/src/tests/integration/test_health.py +++ b/src/tests/integration/test_health.py @@ -10,28 +10,19 @@ def test_health_liveness(mocked_client_integration, httpx_mock): def test_health_readiness(mocked_client_integration, httpx_mock): httpx_mock.add_response( method="GET", - url=f"{env.LITELLM_API_BASE}/health/readiness", + url=f"{env.GATEWAY_API_BASE}/health/readiness", status_code=200, json={ "status": "connected", "pg_server_dbs": {"postgres": "connected", "app_attest": "connected"}, - "litellm": { + "any_llm_gateway": { "status": "connected", - "db": "connected", - "cache": None, - "litellm_version": "1.77.3", - "success_callbacks": [ - "sync_deployment_callback_on_success", - "_PROXY_VirtualKeyModelMaxBudgetLimiter", - "_ProxyDBLogger", - "_PROXY_MaxBudgetLimiter", - "_PROXY_MaxParallelRequestsHandler_v3", - "_PROXY_CacheControlCheck", - "_PROXY_LiteLLMManagedFiles", - "ServiceLogging", - ], - "use_aiohttp_transport": True, - "last_updated": "2025-10-10T00:00:00", + "pg_server_dbs": {"postgres": "connected", "app_attest": "connected"}, + "anyllm_gateway": { + "status": "healthy", + "database": "connected", + "version": "0.1.0", + }, }, }, ) @@ -40,7 +31,7 @@ def test_health_readiness(mocked_client_integration, httpx_mock): assert readiness_response.status_code == 200 assert readiness_response.json().get("status") == "connected" assert readiness_response.json().get("pg_server_dbs") is not None - assert readiness_response.json().get("litellm") is not None + assert readiness_response.json().get("any_llm_gateway") is not None def test_metrics_endpoint(mocked_client_integration): diff --git a/src/tests/integration/test_user.py b/src/tests/integration/test_user.py index c25f58a..c44d3b8 100644 --- a/src/tests/integration/test_user.py +++ b/src/tests/integration/test_user.py @@ -19,8 +19,8 @@ def test_user_info_wrong_params(mocked_client_integration): def test_user_info_endpoint_for_missing_user(mocked_client_integration, httpx_mock): httpx_mock.add_response( method="GET", - url=f"{env.LITELLM_API_BASE}/customer/info?end_user_id={TEST_USER_ID}", - status_code=200, + url=f"{env.GATEWAY_API_BASE}/v1/users/{TEST_USER_ID}", + status_code=404, json={"detail": "User not found"}, ) @@ -28,14 +28,14 @@ def test_user_info_endpoint_for_missing_user(mocked_client_integration, httpx_mo f"/user/{TEST_USER_ID}", headers={"x-fxa-authorization": "Bearer " + TEST_FXA_TOKEN}, ) + assert response.status_code == 404 assert response.json() == {"detail": "User not found"} - assert response.status_code == 200 # litellm returns 200 even if user not found def test_user_info_endpoint_for_existing_user(mocked_client_integration, httpx_mock): httpx_mock.add_response( method="GET", - url=f"{env.LITELLM_API_BASE}/customer/info?end_user_id={TEST_USER_ID}", + url=f"{env.GATEWAY_API_BASE}/v1/users/{TEST_USER_ID}", status_code=200, json={ "user_id": TEST_USER_ID, diff --git a/src/tests/mocks.py b/src/tests/mocks.py index 7479ff4..e3b5c56 100644 --- a/src/tests/mocks.py +++ b/src/tests/mocks.py @@ -12,11 +12,11 @@ async def mock_verify_assert(key_id, assertion_obj, payload: ChatRequest): return {"status": "success"} -async def mock_get_or_create_user(mock_litellm_pg, user_id: str): - user = await mock_litellm_pg.get_user(user_id) +async def mock_get_or_create_user(mock_gateway_pg, user_id: str): + user = await mock_gateway_pg.get_user(user_id) if not user: - await mock_litellm_pg.store_user(user_id, {"data": "testdata"}) - user = await mock_litellm_pg.get_user(user_id) + await mock_gateway_pg.store_user(user_id, {"data": "testdata"}) + user = await mock_gateway_pg.get_user(user_id) return user, True return [{"user_id": user_id, "data": "testdata"}, False] @@ -68,10 +68,10 @@ async def delete_key(self, key_id: str): del self.keys[key_id] -class MockLiteLLMPGService: +class MockGatewayPGService: def __init__(self): self.db_name = "test" - self.db_url = "test_litellm" + self.db_url = "test_gateway" self.connected = True self.users = {} diff --git a/src/tests/unit/test_completions.py b/src/tests/unit/test_completions.py index 02bd23b..3c6fdb9 100644 --- a/src/tests/unit/test_completions.py +++ b/src/tests/unit/test_completions.py @@ -7,7 +7,7 @@ from pytest_httpx import HTTPXMock, IteratorStream from proxy.core.completions import get_completion, stream_completion -from proxy.core.config import LITELLM_COMPLETIONS_URL +from proxy.core.config import GATEWAY_COMPLETIONS_URL from proxy.core.prometheus_metrics import PrometheusResult from tests.consts import SAMPLE_REQUEST, SUCCESSFUL_CHAT_RESPONSE @@ -150,7 +150,7 @@ async def test_stream_completion_success(httpx_mock: HTTPXMock, mocker): # 2. Use pytest-httpx to mock the response for the correct URL and method httpx_mock.add_response( method="POST", - url=LITELLM_COMPLETIONS_URL, + url=GATEWAY_COMPLETIONS_URL, stream=IteratorStream(mock_chunks), status_code=200, ) @@ -175,7 +175,7 @@ async def test_stream_completion_success(httpx_mock: HTTPXMock, mocker): assert request is not None request_body = json.loads(request.content) assert request_body["stream"] is True - assert request_body["user"] == "test-user-123" + assert request_body["user"] == SAMPLE_REQUEST.user assert request_body["model"] == "test-model" # 3. Verify TTFT metric was observed