lightspeed-core · are-ces · Dec 21, 2025
diff --git a/.github/workflows/e2e_tests.yaml b/.github/workflows/e2e_tests.yaml
@@ -10,7 +10,7 @@ jobs:
       fail-fast: false
       matrix:
         mode: ["server", "library"]
-        environment: ["ci", "azure", "vertexai"]
+        environment: ["ci", "azure", "vertexai", "watsonx"]
 
     name: "E2E: ${{ matrix.mode }} mode / ${{ matrix.environment }}"
 
@@ -200,6 +200,8 @@ jobs:
           VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }}
           GOOGLE_APPLICATION_CREDENTIALS: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }}
           GCP_KEYS_PATH: ${{ env.GCP_KEYS_PATH }}
+          WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
+          WATSONX_API_KEY: ${{ secrets.WATSONX_API_KEY }}
         run: |
           # Debug: Check if environment variable is available for docker-compose
           echo "OPENAI_API_KEY is set: $([ -n "$OPENAI_API_KEY" ] && echo 'YES' || echo 'NO')"
@@ -226,6 +228,8 @@ jobs:
           VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }}
           GOOGLE_APPLICATION_CREDENTIALS: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }}
           GCP_KEYS_PATH: ${{ env.GCP_KEYS_PATH }}
+          WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
+          WATSONX_API_KEY: ${{ secrets.WATSONX_API_KEY }}
         run: |
           echo "Starting service in library mode (1 container)"
           docker compose -f docker-compose-library.yaml up -d
@@ -256,6 +260,13 @@ jobs:
             exit 1
           }
 
+      # watsonx has a different convention than "<provider>/<model>"
+      - name: Set watsonx test overrides
+        if: matrix.environment == 'watsonx'
+        run: |
+          echo "E2E_DEFAULT_MODEL_OVERRIDE=watsonx/watsonx/meta-llama/llama-3-3-70b-instruct" >> $GITHUB_ENV
+          echo "E2E_DEFAULT_PROVIDER_OVERRIDE=watsonx" >> $GITHUB_ENV
+
       - name: Run e2e tests
         env:
           TERM: xterm-256color

diff --git a/README.md b/README.md
@@ -122,6 +122,7 @@ Lightspeed Core Stack is based on the FastAPI framework (Uvicorn). The service i
   | OpenAI         | https://platform.openai.com                                           |
   | Azure OpenAI   | https://azure.microsoft.com/en-us/products/ai-services/openai-service |
   | Google VertexAI| https://cloud.google.com/vertex-ai |
+  | IBM WatsonX | https://www.ibm.com/products/watsonx |
   | RHOAI (vLLM)   | See tests/e2e-prow/rhoai/configs/run.yaml                             |
   | RHEL AI (vLLM) | See tests/e2e/configs/run-rhelai.yaml                                 |
 
@@ -177,6 +178,7 @@ __Note__: Support for individual models is dependent on the specific inference p
 | Azure    | gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3-mini, o4-mini | Yes          | remote::azure  | [1](examples/azure-run.yaml)                                               |
 | Azure    |  o1, o1-mini | No          | remote::azure  |  |
 | VertexAI    | google/gemini-2.0-flash, google/gemini-2.5-flash, google/gemini-2.5-pro [^1] | Yes          | remote::vertexai  | [1](examples/vertexai-run.yaml)                                               |
+| WatsonX    | meta-llama/llama-3-3-70b-instruct | Yes          | remote::watsonx  | [1](examples/watsonx-run.yaml)                                               |
 
 [^1]: List of models is limited by design in llama-stack, future versions will probably allow to use more models (see [here](https://github.com/llamastack/llama-stack/blob/release-0.3.x/llama_stack/providers/remote/inference/vertexai/vertexai.py#L54))
 

diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml
@@ -34,6 +34,10 @@ services:
       - GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-}
       - VERTEX_AI_PROJECT=${VERTEX_AI_PROJECT:-}
       - VERTEX_AI_LOCATION=${VERTEX_AI_LOCATION:-}
+      # WatsonX
+      - WATSONX_BASE_URL=${WATSONX_BASE_URL:-}
+      - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID:-}
+      - WATSONX_API_KEY=${WATSONX_API_KEY:-}
       # Enable debug logging if needed
       - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-}
     healthcheck:

diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -32,6 +32,10 @@ services:
       - GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-}
       - VERTEX_AI_PROJECT=${VERTEX_AI_PROJECT:-}
       - VERTEX_AI_LOCATION=${VERTEX_AI_LOCATION:-}
+      # WatsonX
+      - WATSONX_BASE_URL=${WATSONX_BASE_URL:-}
+      - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID:-}
+      - WATSONX_API_KEY=${WATSONX_API_KEY:-}
       # Enable debug logging if needed
       - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-}
     networks:

diff --git a/docs/providers.md b/docs/providers.md
@@ -55,7 +55,7 @@ The tables below summarize each provider category, containing the following atri
 | tgi | remote | `huggingface_hub`, `aiohttp` | ❌ |
 | together | remote | `together` | ❌ |
 | vertexai | remote | `google-auth` | ✅ |
-| watsonx | remote | `ibm_watsonx_ai` | ❌ |
+| watsonx | remote | `litellm` | ✅ |
 
 Red Hat providers:
 

diff --git a/examples/watsonx-run.yaml b/examples/watsonx-run.yaml
@@ -0,0 +1,161 @@
+version: 2
+
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+
+benchmarks: []
+conversations_store:
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db}
+  type: sqlite
+datasets: []
+image_name: starter
+# external_providers_dir: /opt/app-root/src/.llama/providers.d
+inference_store:
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db}
+  type: sqlite
+metadata_store:
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db}
+  type: sqlite
+
+providers:
+  inference:
+  - provider_id: watsonx
+    provider_type: remote::watsonx
+    config:
+      url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
+      api_key: ${env.WATSONX_API_KEY:=key-not-set}
+      project_id: ${env.WATSONX_PROJECT_ID:=project-not-set}
+      timeout: 1200
+  - config: {}
+    provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  files:
+  - config:
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+      storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files}
+    provider_id: meta-reference-files
+    provider_type: inline::localfs
+  safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls
+  # - config:
+  #     excluded_categories: []
+  #   provider_id: llama-guard
+  #   provider_type: inline::llama-guard
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: '********'
+  tool_runtime:
+  - config: {} # Enable the RAG tool
+    provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  vector_io:
+  - config: # Define the storage backend for RAG
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+    provider_id: faiss
+    provider_type: inline::faiss
+  agents:
+  - config:
+      persistence:
+        agent_state:
+          namespace: agents_state
+          backend: kv_default
+        responses:
+          table_name: agents_responses
+          backend: sql_default
+    provider_id: meta-reference
+    provider_type: inline::meta-reference
+  batches:
+  - config:
+      kvstore:
+        namespace: batches_store
+        backend: kv_default
+    provider_id: reference
+    provider_type: inline::reference
+  datasetio:
+  - config:
+      kvstore:
+        namespace: huggingface_datasetio
+        backend: kv_default
+    provider_id: huggingface
+    provider_type: remote::huggingface
+  - config:
+      kvstore:
+        namespace: localfs_datasetio
+        backend: kv_default
+    provider_id: localfs
+    provider_type: inline::localfs
+  eval:
+  - config:
+      kvstore:
+        namespace: eval_store
+        backend: kv_default
+    provider_id: meta-reference
+    provider_type: inline::meta-reference
+scoring_fns: []
+telemetry:
+  enabled: true
+server:
+  port: 8321
+storage:
+  backends:
+    kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db}
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db}
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models: 
+  - model_id: custom-watsonx-model
+    provider_id: watsonx
+    model_type: llm
+    provider_model_id: watsonx/meta-llama/llama-3-3-70b-instruct
+  shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::rag # Register the RAG tool
+    provider_id: rag-runtime
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model: # Define the default embedding model for RAG
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py
@@ -20,9 +20,9 @@
     Toolgroup,
     ToolgroupAgentToolGroupWithArgs,
 )
+from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep
 from llama_stack_client.types.model_list_response import ModelListResponse
 from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
-from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep
 from sqlalchemy.exc import SQLAlchemyError
 
 import constants
@@ -41,8 +41,8 @@
     ForbiddenResponse,
     InternalServerErrorResponse,
     NotFoundResponse,
-    QueryResponse,
     PromptTooLongResponse,
+    QueryResponse,
     QuotaExceededResponse,
     ReferencedDocument,
     ServiceUnavailableResponse,
@@ -540,7 +540,8 @@ def select_model_and_provider_id(
     logger.debug("Searching for model: %s, provider: %s", model_id, provider_id)
     # TODO: Create sepparate validation of provider
     if not any(
-        m.identifier == llama_stack_model_id and m.provider_id == provider_id
+        m.identifier in (llama_stack_model_id, model_id)
+        and m.provider_id == provider_id
         for m in models
     ):
         message = f"Model {model_id} from provider {provider_id} not found in available models"