diff --git a/.gitignore b/.gitignore index 002274a..840090d 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ service_account.json # Since we are running it as a library, better not to commit the lock file uv.lock .secrets +openapi.json diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f49ae5a..dc24749 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -25,16 +25,16 @@ Ensure all tests pass: `pytest -v` ## Local Build for QA and manual testing -1. Use `litellm_docker_compose.yaml` to start LiteLLM and Postgres locally: +1. Use `anyllm_docker_compose.yaml` to start Any-LLM-Gateway and Postgres locally: ```bash - docker compose -f litellm_docker_compose.yaml up -d + docker compose -f anyllm_docker_compose.yaml up -d ``` or if you are using legacy docker-compose: ```bash - docker-compose -f litellm_docker_compose.yaml up -d + docker-compose -f anyllm_docker_compose.yaml up -d ``` 2. Create a second database that is needed for authentication @@ -43,7 +43,7 @@ or if you are using legacy docker-compose: bash scripts/create-app-attest-database.sh ``` -LiteLLM will be accessible at `localhost:4000` and `localhost:4000/ui`. +Any-LLM-Gateway will be accessible at `localhost:4000` and `localhost:4000/ui`. 3. Run MLPA with @@ -54,7 +54,7 @@ LiteLLM will be accessible at `localhost:4000` and `localhost:4000/ui`. 4. Stop the service with ```bash -docker compose -f litellm_docker_compose.yaml down +docker compose -f anyllm_docker_compose.yaml down ``` ### Useful CURLs for QA @@ -79,7 +79,7 @@ curl --location 'http://0.0.0.0:8080/health/readiness' \ curl --location 'http://0.0.0.0:8080/v1/chat/completions' \ --header 'Content-Type: application/json' \ --header 'x-fxa-authorization: Bearer {YOUR_MOZILLA_FXA_TOKEN}' \ - --header 'X-LiteLLM-Key: Bearer {MASTER_KEY}' \ + --header 'X-AnyLLM-Key: Bearer {MASTER_KEY}' \ --data '{ "model": "openai/gpt-4o", "messages": [{ @@ -89,7 +89,7 @@ curl --location 'http://0.0.0.0:8080/health/readiness' \ }' ``` -1. LiteLLM liveness: +1. Any-LLM-Gateway liveness: ```bash curl --location 'http://localhost:4000/health/liveness' \ @@ -101,16 +101,16 @@ curl --location 'http://localhost:4000/health/liveness' \ ```bash curl --location 'http://localhost:4000/models' \ --header 'Content-Type: application/json' \ ---header 'X-LiteLLM-Key: Bearer {MASTER_KEY}' \ +--header 'X-AnyLLM-Key: Bearer {MASTER_KEY}' \ --data '' ``` -1. Completion directly from LiteLLM: +1. Completion directly from Any-LLM-Gateway: ```bash curl --location 'http://localhost:4000/v1/chat/completions' \ --header 'Content-Type: application/json' \ ---header 'X-LiteLLM-Key: Bearer {MASTER_KEY}' \ +--header 'X-AnyLLM-Key: Bearer {MASTER_KEY}' \ --data '{ "model": "openai/gpt-4o", "messages": [ diff --git a/README.md b/README.md index 6b87638..f0fad97 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Mozilla LLM Proxy Auth (MLPA) -A proxy to verify App Attest/FxA payloads and proxy requests through LiteLLM to enact budgets and per user management. +A proxy to verify App Attest/FxA payloads and proxy requests through any-llm-gateway to enact budgets and per user management. ## Setup @@ -12,9 +12,13 @@ This creates a virtual environment in `.venv/`, installs dependencies, and insta ## Running MLPA locally with Docker -### Run LiteLLM +### Run Any-LLM-Gateway -`docker compose -f litellm_docker_compose.yaml up -d` +The any-llm-gateway image requires authentication to pull: see [github docs](https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-container-registry#authenticating-with-a-personal-access-token-classic) for help with creating a PAT and authenticating docker to the registry. +```bash +echo $GITHUB_PAT | docker login ghcr.io -u USERNAME --password-stdin # The command to authenticate docker with ghcr +docker compose -f anyllm_docker_compose.yaml up -d +``` ### Run MLPA @@ -30,14 +34,14 @@ pip install --no-cache-dir -e . mlpa ``` -## Config (see [LiteLLM Documentation](https://docs.litellm.ai/docs/simple_proxy_old_doc) for more config options) +## Config `.env` (see `config.py` for all configuration variables) ``` MASTER_KEY="sk-1234..." -LITELLM_API_BASE="http://mlpa:4000" -DATABASE_URL=postgresql://... # required for direct user editing in SQL +GATEWAY_API_BASE="http://any-llm-gateway:8000" +DATABASE_URL=postgresql://gateway:gateway@postgres:5432 CHALLENGE_EXPIRY_SECONDS=300 PORT=8080 @@ -47,12 +51,14 @@ APP_DEVELOPMENT_TEAM="12BC943KDC" CLIENT_ID="..." CLIENT_SECRET="..." -MODEL_NAME="" +MODEL_NAME="vertexai:model-name" # Use provider:model format TEMPERATURE=0.1 TOP_P=0.01 ``` -### Also See `litellm_config.yaml` for litellm config +### Gateway Configuration + +See `gateway_config.yaml` for any-llm-gateway configuration. Service account configured to hit VertexAI: `service_account.json` should be in directory root diff --git a/anyllm_docker_compose.yaml b/anyllm_docker_compose.yaml new file mode 100644 index 0000000..b961936 --- /dev/null +++ b/anyllm_docker_compose.yaml @@ -0,0 +1,47 @@ +services: + postgres: + image: postgres:16-alpine + container_name: anyllm_postgres + restart: always + environment: + POSTGRES_USER: gateway + POSTGRES_PASSWORD: gateway + POSTGRES_DB: gateway + ports: + - "5432:5432" + volumes: + - pg_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U gateway -d gateway"] + interval: 5s + timeout: 5s + retries: 5 + networks: + - gateway-network + + any-llm-gateway: + image: ghcr.io/mozilla-ai/any-llm-gateway:main + container_name: any_llm_gateway + platform: linux/amd64 + depends_on: + postgres: + condition: service_healthy + ports: + - "8000:8000" + volumes: + - ./gateway_config.yaml:/app/config.yaml + - ./service_account.json:/app/service_account.json + environment: + - DATABASE_URL=postgresql://gateway:gateway@postgres:5432/gateway + - GATEWAY_MASTER_KEY=${MASTER_KEY} + restart: unless-stopped + networks: + - gateway-network + +volumes: + pg_data: + +networks: + gateway-network: + name: mlpa-network + driver: bridge diff --git a/docs/index.html b/docs/index.html index 40af97a..2871919 100644 --- a/docs/index.html +++ b/docs/index.html @@ -3,7 +3,7 @@
-Download OpenAPI specification:
A clean FastAPI gateway for any-llm with API key management
+OpenAI-compatible chat completions endpoint.
+Supports both streaming and non-streaming responses. +Handles reasoning content from any-llm providers.
+Authentication modes:
+| model required | string (Model) |
required | Array of objects (Messages) |
User (string) or User (null) (User) | |
Temperature (number) or Temperature (null) (Temperature) | |
Max Tokens (integer) or Max Tokens (null) (Max Tokens) | |
Top P (number) or Top P (null) (Top P) | |
| stream | boolean (Stream) Default: false |
Array of Tools (objects) or Tools (null) (Tools) | |
Tool Choice (string) or Tool Choice (object) or Tool Choice (null) (Tool Choice) | |
Response Format (object) or Response Format (null) (Response Format) |
{- "model": "string",
- "messages": [
- { }
], - "user": "string",
- "temperature": 0,
- "max_tokens": 0,
- "top_p": 0,
- "stream": false,
- "tools": [
- { }
], - "tool_choice": "string",
- "response_format": { }
}nullCreate a new API key.
+Requires master key authentication.
+If user_id is provided, the key will be associated with that user (creates user if it doesn't exist). +If user_id is not provided, a new user will be created automatically and the key will be associated with it.
+Key Name (string) or Key Name (null) (Key Name) Optional name for the key + | |
User Id (string) or User Id (null) (User Id) Optional user ID to associate with this key + | |
Expires At (string) or Expires At (null) (Expires At) Optional expiration timestamp + | |
object (Metadata) Optional metadata + |
{- "key_name": "string",
- "user_id": "string",
- "expires_at": "2019-08-24T14:15:22Z",
- "metadata": { }
}{- "id": "string",
- "key": "string",
- "key_name": "string",
- "user_id": "string",
- "created_at": "string",
- "expires_at": "string",
- "is_active": true,
- "metadata": { }
}List all API keys.
+Requires master key authentication.
+| skip | integer (Skip) Default: 0 |
| limit | integer (Limit) Default: 100 |
[- {
- "id": "string",
- "key_name": "string",
- "user_id": "string",
- "created_at": "string",
- "last_used_at": "string",
- "expires_at": "string",
- "is_active": true,
- "metadata": { }
}
]Get details of a specific API key.
+Requires master key authentication.
+| key_id required | string (Key Id) |
{- "id": "string",
- "key_name": "string",
- "user_id": "string",
- "created_at": "string",
- "last_used_at": "string",
- "expires_at": "string",
- "is_active": true,
- "metadata": { }
}Update an API key.
+Requires master key authentication.
+| key_id required | string (Key Id) |
Key Name (string) or Key Name (null) (Key Name) | |
Is Active (boolean) or Is Active (null) (Is Active) | |
Expires At (string) or Expires At (null) (Expires At) | |
Metadata (object) or Metadata (null) (Metadata) |
{- "key_name": "string",
- "is_active": true,
- "expires_at": "2019-08-24T14:15:22Z",
- "metadata": { }
}{- "id": "string",
- "key_name": "string",
- "user_id": "string",
- "created_at": "string",
- "last_used_at": "string",
- "expires_at": "string",
- "is_active": true,
- "metadata": { }
}Create a new user.
+| user_id required | string (User Id) Unique user identifier + |
Alias (string) or Alias (null) (Alias) Optional admin-facing alias + | |
Budget Id (string) or Budget Id (null) (Budget Id) Optional budget ID + | |
| blocked | boolean (Blocked) Default: false Whether user is blocked + |
object (Metadata) Optional metadata + |
{- "user_id": "string",
- "alias": "string",
- "budget_id": "string",
- "blocked": false,
- "metadata": { }
}{- "user_id": "string",
- "alias": "string",
- "spend": 0,
- "budget_id": "string",
- "blocked": true,
- "created_at": "string",
- "updated_at": "string",
- "metadata": { }
}List all users with pagination.
+| skip | integer (Skip) Default: 0 |
| limit | integer (Limit) Default: 100 |
[- {
- "user_id": "string",
- "alias": "string",
- "spend": 0,
- "budget_id": "string",
- "blocked": true,
- "created_at": "string",
- "updated_at": "string",
- "metadata": { }
}
]{- "user_id": "string",
- "alias": "string",
- "spend": 0,
- "budget_id": "string",
- "blocked": true,
- "created_at": "string",
- "updated_at": "string",
- "metadata": { }
}Update a user.
+| user_id required | string (User Id) |
Alias (string) or Alias (null) (Alias) | |
Budget Id (string) or Budget Id (null) (Budget Id) | |
Blocked (boolean) or Blocked (null) (Blocked) | |
Metadata (object) or Metadata (null) (Metadata) |
{- "alias": "string",
- "budget_id": "string",
- "blocked": true,
- "metadata": { }
}{- "user_id": "string",
- "alias": "string",
- "spend": 0,
- "budget_id": "string",
- "blocked": true,
- "created_at": "string",
- "updated_at": "string",
- "metadata": { }
}Get usage history for a specific user.
+| user_id required | string (User Id) |
| skip | integer (Skip) Default: 0 |
| limit | integer (Limit) Default: 100 |
[- {
- "id": "string",
- "user_id": "string",
- "api_key_id": "string",
- "timestamp": "string",
- "model": "string",
- "provider": "string",
- "endpoint": "string",
- "prompt_tokens": 0,
- "completion_tokens": 0,
- "total_tokens": 0,
- "cost": 0,
- "status": "string",
- "error_message": "string"
}
]Create a new budget.
+Max Budget (number) or Max Budget (null) (Max Budget) Maximum spending limit + | |
Budget Duration (string) or Budget Duration (null) (Budget Duration) Budget duration (e.g., 'monthly', 'daily') + | |
Budget Reset At (string) or Budget Reset At (null) (Budget Reset At) When budget resets + |
{- "max_budget": 0,
- "budget_duration": "string",
- "budget_reset_at": "2019-08-24T14:15:22Z"
}{- "budget_id": "string",
- "max_budget": 0,
- "budget_duration": "string",
- "budget_reset_at": "string",
- "created_at": "string",
- "updated_at": "string"
}List all budgets with pagination.
+| skip | integer (Skip) Default: 0 |
| limit | integer (Limit) Default: 100 |
[- {
- "budget_id": "string",
- "max_budget": 0,
- "budget_duration": "string",
- "budget_reset_at": "string",
- "created_at": "string",
- "updated_at": "string"
}
]Update a budget.
+| budget_id required | string (Budget Id) |
Max Budget (number) or Max Budget (null) (Max Budget) | |
Budget Duration (string) or Budget Duration (null) (Budget Duration) | |
Budget Reset At (string) or Budget Reset At (null) (Budget Reset At) |
{- "max_budget": 0,
- "budget_duration": "string",
- "budget_reset_at": "2019-08-24T14:15:22Z"
}{- "budget_id": "string",
- "max_budget": 0,
- "budget_duration": "string",
- "budget_reset_at": "string",
- "created_at": "string",
- "updated_at": "string"
}Set or update pricing for a model.
+| model_key required | string (Model Key) Model identifier in format 'provider:model' + |
| input_price_per_million required | number (Input Price Per Million) Price per 1M input tokens + |
| output_price_per_million required | number (Output Price Per Million) Price per 1M output tokens + |
{- "model_key": "string",
- "input_price_per_million": 0,
- "output_price_per_million": 0
}{- "model_key": "string",
- "input_price_per_million": 0,
- "output_price_per_million": 0,
- "created_at": "string",
- "updated_at": "string"
}List all model pricing.
+| skip | integer (Skip) Default: 0 |
| limit | integer (Limit) Default: 100 |
[- {
- "model_key": "string",
- "input_price_per_million": 0,
- "output_price_per_million": 0,
- "created_at": "string",
- "updated_at": "string"
}
]Readiness probe endpoint.
+Checks if the gateway is ready to serve requests by validating:
+Used by Kubernetes/container orchestrators for readiness probes. +Returns HTTP 503 if any dependency is unavailable.
+Returns: + dict: Status object with health details
+Raises: + HTTPException: 503 if service is not ready
+{ }