Firefox-AI · njbrake · Oct 22, 2025 · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025
diff --git a/.gitignore b/.gitignore
@@ -9,3 +9,4 @@ service_account.json
 # Since we are running it as a library, better not to commit the lock file
 uv.lock
 .secrets
+openapi.json
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -25,16 +25,16 @@ Ensure all tests pass: `pytest -v`
 
 ## Local Build for QA and manual testing
 
-1. Use `litellm_docker_compose.yaml` to start LiteLLM and Postgres locally:
+1. Use `anyllm_docker_compose.yaml` to start Any-LLM-Gateway and Postgres locally:
 
   ```bash
-  docker compose -f litellm_docker_compose.yaml up -d
+  docker compose -f anyllm_docker_compose.yaml up -d
   ```
 
 or if you are using legacy docker-compose:
 
   ```bash
-  docker-compose -f litellm_docker_compose.yaml up -d
+  docker-compose -f anyllm_docker_compose.yaml up -d
   ```
 
 2. Create a second database that is needed for authentication
@@ -43,7 +43,7 @@ or if you are using legacy docker-compose:
   bash scripts/create-app-attest-database.sh
   ```
 
-LiteLLM will be accessible at `localhost:4000` and `localhost:4000/ui`.
+Any-LLM-Gateway will be accessible at `localhost:4000` and `localhost:4000/ui`.
 
 3. Run MLPA with
 
@@ -54,7 +54,7 @@ LiteLLM will be accessible at `localhost:4000` and `localhost:4000/ui`.
 4. Stop the service with
 
 ```bash
-docker compose -f litellm_docker_compose.yaml down
+docker compose -f anyllm_docker_compose.yaml down
 ```
 
 ### Useful CURLs for QA
@@ -79,7 +79,7 @@ curl --location 'http://0.0.0.0:8080/health/readiness' \
   curl --location 'http://0.0.0.0:8080/v1/chat/completions' \
   --header 'Content-Type: application/json' \
   --header 'x-fxa-authorization: Bearer {YOUR_MOZILLA_FXA_TOKEN}' \
-  --header 'X-LiteLLM-Key: Bearer {MASTER_KEY}' \
+  --header 'X-AnyLLM-Key: Bearer {MASTER_KEY}' \
   --data '{
    "model": "openai/gpt-4o",
     "messages": [{
@@ -89,7 +89,7 @@ curl --location 'http://0.0.0.0:8080/health/readiness' \
   }'
   ```
 
-1. LiteLLM liveness:
+1. Any-LLM-Gateway liveness:
 
 ```bash
 curl --location 'http://localhost:4000/health/liveness' \
@@ -101,16 +101,16 @@ curl --location 'http://localhost:4000/health/liveness' \
 ```bash
 curl --location 'http://localhost:4000/models' \
 --header 'Content-Type: application/json' \
---header 'X-LiteLLM-Key: Bearer {MASTER_KEY}' \
+--header 'X-AnyLLM-Key: Bearer {MASTER_KEY}' \
 --data ''
 ```
 
-1. Completion directly from LiteLLM:
+1. Completion directly from Any-LLM-Gateway:
 
 ```bash
 curl --location 'http://localhost:4000/v1/chat/completions' \
 --header 'Content-Type: application/json' \
---header 'X-LiteLLM-Key: Bearer {MASTER_KEY}' \
+--header 'X-AnyLLM-Key: Bearer {MASTER_KEY}' \
 --data '{
     "model": "openai/gpt-4o",
     "messages": [

diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # Mozilla LLM Proxy Auth (MLPA)
 
-A proxy to verify App Attest/FxA payloads and proxy requests through LiteLLM to enact budgets and per user management.
+A proxy to verify App Attest/FxA payloads and proxy requests through any-llm-gateway to enact budgets and per user management.
 
 ## Setup
 
@@ -12,9 +12,13 @@ This creates a virtual environment in `.venv/`, installs dependencies, and insta
 
 ## Running MLPA locally with Docker
 
-### Run LiteLLM
+### Run Any-LLM-Gateway
 
-`docker compose -f litellm_docker_compose.yaml up -d`
+The any-llm-gateway image requires authentication to pull: see [github docs](https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-container-registry#authenticating-with-a-personal-access-token-classic) for help with creating a PAT and authenticating docker to the registry.
+```bash
+echo $GITHUB_PAT | docker login ghcr.io -u USERNAME --password-stdin # The command to authenticate docker with ghcr
+docker compose -f anyllm_docker_compose.yaml up -d
+```
 
 ### Run MLPA
 
@@ -30,14 +34,14 @@ pip install --no-cache-dir -e .
 mlpa
 ```
 
-## Config (see [LiteLLM Documentation](https://docs.litellm.ai/docs/simple_proxy_old_doc) for more config options)
+## Config
 
 `.env` (see `config.py` for all configuration variables)
 
 ```
 MASTER_KEY="sk-1234..."
-LITELLM_API_BASE="http://mlpa:4000"
-DATABASE_URL=postgresql://... # required for direct user editing in SQL
+GATEWAY_API_BASE="http://any-llm-gateway:8000"
+DATABASE_URL=postgresql://gateway:gateway@postgres:5432
 CHALLENGE_EXPIRY_SECONDS=300
 PORT=8080
 
@@ -47,12 +51,14 @@ APP_DEVELOPMENT_TEAM="12BC943KDC"
 CLIENT_ID="..."
 CLIENT_SECRET="..."
 
-MODEL_NAME=""
+MODEL_NAME="vertexai:model-name"  # Use provider:model format
 TEMPERATURE=0.1
 TOP_P=0.01
 ```
 
-### Also See `litellm_config.yaml` for litellm config
+### Gateway Configuration
+
+See `gateway_config.yaml` for any-llm-gateway configuration.
 
 Service account configured to hit VertexAI: `service_account.json` should be in directory root
 

diff --git a/anyllm_docker_compose.yaml b/anyllm_docker_compose.yaml
@@ -0,0 +1,47 @@
+services:
+  postgres:
+    image: postgres:16-alpine
+    container_name: anyllm_postgres
+    restart: always
+    environment:
+      POSTGRES_USER: gateway
+      POSTGRES_PASSWORD: gateway
+      POSTGRES_DB: gateway
+    ports:
+      - "5432:5432"
+    volumes:
+      - pg_data:/var/lib/postgresql/data
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U gateway -d gateway"]
+      interval: 5s
+      timeout: 5s
+      retries: 5
+    networks:
+      - gateway-network
+
+  any-llm-gateway:
+    image: ghcr.io/mozilla-ai/any-llm-gateway:main
+    container_name: any_llm_gateway
+    platform: linux/amd64
+    depends_on:
+      postgres:
+        condition: service_healthy
+    ports:
+      - "8000:8000"
+    volumes:
+      - ./gateway_config.yaml:/app/config.yaml
+      - ./service_account.json:/app/service_account.json
+    environment:
+      - DATABASE_URL=postgresql://gateway:gateway@postgres:5432/gateway
+      - GATEWAY_MASTER_KEY=${MASTER_KEY}
+    restart: unless-stopped
+    networks:
+      - gateway-network
+
+volumes:
+  pg_data:
+
+networks:
+  gateway-network:
+    name: mlpa-network
+    driver: bridge
diff --git a/docs/index.html b/docs/index.html
diff --git a/gateway_config.yaml b/gateway_config.yaml
@@ -0,0 +1,14 @@
+database_url: "${DATABASE_URL}"
+
+host: "0.0.0.0"
+port: 8000
+
+# Master key for protecting key management endpoints (set via environment variable)
+master_key: "${GATEWAY_MASTER_KEY}"
+
+providers:
+  # Vertex AI configuration (for Google Cloud)
+  vertexai:
+    credentials: "/app/service_account.json"  # Path to service account JSON file
+    project: "fx-gen-ai-sandbox"              # GCP project ID
+    location: "us-south1"                     # GCP region
diff --git a/litellm_config.yaml b/litellm_config.yaml
diff --git a/litellm_docker_compose.yaml b/litellm_docker_compose.yaml
diff --git a/mlpa_docker_compose.yaml b/mlpa_docker_compose.yaml
@@ -10,8 +10,8 @@ services:
     environment:
       - PG_DB_URL=${PG_DB_URL}
       - MASTER_KEY=${MASTER_KEY}
-      - LITELLM_API_BASE=${LITELLM_API_BASE}
-      - LITELLM_DB_NAME=${LITELLM_DB_NAME}
+      - GATEWAY_API_BASE=${GATEWAY_API_BASE}
+      - GATEWAY_DB_NAME=${GATEWAY_DB_NAME}
       - CHALLENGE_EXPIRY_SECONDS=${CHALLENGE_EXPIRY_SECONDS}
       - PORT=${PORT}
       - APP_BUNDLE_ID=${APP_BUNDLE_ID}

diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "mlpa"
 version = "0.1.0"
-description = "A proxy to verify App Attest/FxA payloads and proxy requests using LiteLLM virtual keys."
+description = "A proxy to verify App Attest/FxA payloads and proxy requests using Any-LLM-Gateway virtual keys."
 authors = [{ name = "Noah Podgurski", email = "npodgurski@mozilla.com" }]
 readme = "README.md"
 requires-python = ">=3.12"

diff --git a/scripts/create-app-attest-database.sh b/scripts/create-app-attest-database.sh
@@ -1 +1 @@
-docker exec -it litellm_postgres psql -U litellm -c "CREATE DATABASE app_attest;"
+docker exec -it anyllm_postgres psql -U gateway -c "CREATE DATABASE app_attest;"
diff --git a/scripts/create-docs.sh b/scripts/create-docs.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+set -e
+# set dir to the root of the project
+cd "$(dirname "$0")/.."
+
+OPENAPI_URL="http://localhost:8000/openapi.json"
+OUTPUT="docs/index.html"
+API_JSON="openapi.json"
+
+# Optional: fetch the OpenAPI JSON first
+curl -sSL "$OPENAPI_URL" -o "$API_JSON"
+
+# If you have redoc-cli installed, bundle into a standalone HTML
+# Ensure you have npm and redoc-cli installed: npm install -g redoc-cli
+npx -y @redocly/cli@latest build-docs "$API_JSON" -o "$OUTPUT"
+
+rm $API_JSON
+
+echo "Generated $OUTPUT from $OPENAPI_URL"
diff --git a/src/proxy/core/completions.py b/src/proxy/core/completions.py
@@ -5,13 +5,13 @@
 from fastapi import HTTPException
 
 from .classes import AuthorizedChatRequest
-from .config import LITELLM_COMPLETIONS_URL, LITELLM_HEADERS
+from .config import GATEWAY_COMPLETIONS_URL, GATEWAY_HEADERS
 from .prometheus_metrics import PrometheusResult, metrics
 
 
 async def stream_completion(authorized_chat_request: AuthorizedChatRequest):
 	"""
-	Proxies a streaming request to LiteLLM.
+	Proxies a streaming request to any-llm-gateway.
 	Yields response chunks as they are received and logs metrics.
 	"""
 	start_time = time.time()
@@ -31,8 +31,8 @@ async def stream_completion(authorized_chat_request: AuthorizedChatRequest):
 		async with httpx.AsyncClient() as client:
 			async with client.stream(
 				"POST",
-				LITELLM_COMPLETIONS_URL,
-				headers=LITELLM_HEADERS,
+				GATEWAY_COMPLETIONS_URL,
+				headers=GATEWAY_HEADERS,
 				json=body,
 				timeout=30,
 			) as response:
@@ -66,7 +66,7 @@ async def stream_completion(authorized_chat_request: AuthorizedChatRequest):
 		)
 		return
 	except Exception as e:
-		print(f"Failed to proxy request to {LITELLM_COMPLETIONS_URL}: {e}")
+		print(f"Failed to proxy request to {GATEWAY_COMPLETIONS_URL}: {e}")
 		return
 	finally:
 		metrics.chat_completion_latency.labels(result=result).observe(
@@ -76,7 +76,7 @@ async def stream_completion(authorized_chat_request: AuthorizedChatRequest):
 
 async def get_completion(authorized_chat_request: AuthorizedChatRequest):
 	"""
-	Proxies a non-streaming request to LiteLLM.
+	Proxies a non-streaming request to any-llm-gateway.
 	"""
 	start_time = time.time()
 	body = {
@@ -92,7 +92,7 @@ async def get_completion(authorized_chat_request: AuthorizedChatRequest):
 	try:
 		async with httpx.AsyncClient() as client:
 			response = await client.post(
-				LITELLM_COMPLETIONS_URL, headers=LITELLM_HEADERS, json=body, timeout=10
+				GATEWAY_COMPLETIONS_URL, headers=GATEWAY_HEADERS, json=body, timeout=10
 			)
 			response.raise_for_status()
 			data = response.json()
@@ -109,7 +109,7 @@ async def get_completion(authorized_chat_request: AuthorizedChatRequest):
 		raise HTTPException(
 			status_code=500,
 			detail={
-				"error": f"Failed to proxy request to {LITELLM_COMPLETIONS_URL}: {e}"
+				"error": f"Failed to proxy request to {GATEWAY_COMPLETIONS_URL}: {e}"
 			},
 		)
 	finally:
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		docker exec -it litellm_postgres psql -U litellm -c "CREATE DATABASE app_attest;"
		docker exec -it anyllm_postgres psql -U gateway -c "CREATE DATABASE app_attest;"