diff --git a/.github/workflows/e2e_tests.yaml b/.github/workflows/e2e_tests.yaml
index a80de744..371a3e87 100644
--- a/.github/workflows/e2e_tests.yaml
+++ b/.github/workflows/e2e_tests.yaml
@@ -10,7 +10,7 @@ jobs:
       fail-fast: false
       matrix:
         mode: ["server", "library"]
-        environment: ["ci", "azure", "vertexai"]
+        environment: ["ci", "azure", "vertexai", "watsonx"]
     
     name: "E2E: ${{ matrix.mode }} mode / ${{ matrix.environment }}"
     
@@ -200,6 +200,8 @@ jobs:
           VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }}
           GOOGLE_APPLICATION_CREDENTIALS: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }}
           GCP_KEYS_PATH: ${{ env.GCP_KEYS_PATH }}
+          WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
+          WATSONX_API_KEY: ${{ secrets.WATSONX_API_KEY }}
         run: |
           # Debug: Check if environment variable is available for docker-compose
           echo "OPENAI_API_KEY is set: $([ -n "$OPENAI_API_KEY" ] && echo 'YES' || echo 'NO')"
@@ -226,6 +228,8 @@ jobs:
           VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }}
           GOOGLE_APPLICATION_CREDENTIALS: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }}
           GCP_KEYS_PATH: ${{ env.GCP_KEYS_PATH }}
+          WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
+          WATSONX_API_KEY: ${{ secrets.WATSONX_API_KEY }}
         run: |
           echo "Starting service in library mode (1 container)"
           docker compose -f docker-compose-library.yaml up -d
@@ -256,6 +260,13 @@ jobs:
             exit 1
           }
 
+      # watsonx has a different convention than "<provider>/<model>"
+      - name: Set watsonx test overrides
+        if: matrix.environment == 'watsonx'
+        run: |
+          echo "E2E_DEFAULT_MODEL_OVERRIDE=watsonx/watsonx/meta-llama/llama-3-3-70b-instruct" >> $GITHUB_ENV
+          echo "E2E_DEFAULT_PROVIDER_OVERRIDE=watsonx" >> $GITHUB_ENV
+
       - name: Run e2e tests
         env:
           TERM: xterm-256color
diff --git a/README.md b/README.md
index c7a84a1b..27df9d80 100644
--- a/README.md
+++ b/README.md
@@ -122,6 +122,7 @@ Lightspeed Core Stack is based on the FastAPI framework (Uvicorn). The service i
   | OpenAI         | https://platform.openai.com                                           |
   | Azure OpenAI   | https://azure.microsoft.com/en-us/products/ai-services/openai-service |
   | Google VertexAI| https://cloud.google.com/vertex-ai |
+  | IBM WatsonX | https://www.ibm.com/products/watsonx |
   | RHOAI (vLLM)   | See tests/e2e-prow/rhoai/configs/run.yaml                             |
   | RHEL AI (vLLM) | See tests/e2e/configs/run-rhelai.yaml                                 |
 
@@ -177,6 +178,7 @@ __Note__: Support for individual models is dependent on the specific inference p
 | Azure    | gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3-mini, o4-mini | Yes          | remote::azure  | [1](examples/azure-run.yaml)                                               |
 | Azure    |  o1, o1-mini | No          | remote::azure  |  |
 | VertexAI    | google/gemini-2.0-flash, google/gemini-2.5-flash, google/gemini-2.5-pro [^1] | Yes          | remote::vertexai  | [1](examples/vertexai-run.yaml)                                               |
+| WatsonX    | meta-llama/llama-3-3-70b-instruct | Yes          | remote::watsonx  | [1](examples/watsonx-run.yaml)                                               |
 
 [^1]: List of models is limited by design in llama-stack, future versions will probably allow to use more models (see [here](https://github.com/llamastack/llama-stack/blob/release-0.3.x/llama_stack/providers/remote/inference/vertexai/vertexai.py#L54))
 
diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml
index 4733d5d6..8c164ab2 100644
--- a/docker-compose-library.yaml
+++ b/docker-compose-library.yaml
@@ -34,6 +34,10 @@ services:
       - GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-}
       - VERTEX_AI_PROJECT=${VERTEX_AI_PROJECT:-}
       - VERTEX_AI_LOCATION=${VERTEX_AI_LOCATION:-}
+      # WatsonX
+      - WATSONX_BASE_URL=${WATSONX_BASE_URL:-}
+      - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID:-}
+      - WATSONX_API_KEY=${WATSONX_API_KEY:-}
       # Enable debug logging if needed
       - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-}
     healthcheck:
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 3b00c381..292d6886 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -32,6 +32,10 @@ services:
       - GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-}
       - VERTEX_AI_PROJECT=${VERTEX_AI_PROJECT:-}
       - VERTEX_AI_LOCATION=${VERTEX_AI_LOCATION:-}
+      # WatsonX
+      - WATSONX_BASE_URL=${WATSONX_BASE_URL:-}
+      - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID:-}
+      - WATSONX_API_KEY=${WATSONX_API_KEY:-}
       # Enable debug logging if needed
       - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-}
     networks:
diff --git a/docs/providers.md b/docs/providers.md
index 32f320dc..0f3891e1 100644
--- a/docs/providers.md
+++ b/docs/providers.md
@@ -55,7 +55,7 @@ The tables below summarize each provider category, containing the following atri
 | tgi | remote | `huggingface_hub`, `aiohttp` | ❌ |
 | together | remote | `together` | ❌ |
 | vertexai | remote | `google-auth` | ✅ |
-| watsonx | remote | `ibm_watsonx_ai` | ❌ |
+| watsonx | remote | `litellm` | ✅ |
 
 Red Hat providers:
 
diff --git a/examples/watsonx-run.yaml b/examples/watsonx-run.yaml
new file mode 100644
index 00000000..3d3d6c0c
--- /dev/null
+++ b/examples/watsonx-run.yaml
@@ -0,0 +1,161 @@
+version: 2
+
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+      
+benchmarks: []
+conversations_store:
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db}
+  type: sqlite
+datasets: []
+image_name: starter
+# external_providers_dir: /opt/app-root/src/.llama/providers.d
+inference_store:
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db}
+  type: sqlite
+metadata_store:
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db}
+  type: sqlite
+
+providers:
+  inference:
+  - provider_id: watsonx
+    provider_type: remote::watsonx
+    config:
+      url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
+      api_key: ${env.WATSONX_API_KEY:=key-not-set}
+      project_id: ${env.WATSONX_PROJECT_ID:=project-not-set}
+      timeout: 1200
+  - config: {}
+    provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  files:
+  - config:
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+      storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files}
+    provider_id: meta-reference-files
+    provider_type: inline::localfs
+  safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls
+  # - config:
+  #     excluded_categories: []
+  #   provider_id: llama-guard
+  #   provider_type: inline::llama-guard
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: '********'
+  tool_runtime:
+  - config: {} # Enable the RAG tool
+    provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  vector_io:
+  - config: # Define the storage backend for RAG
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+    provider_id: faiss
+    provider_type: inline::faiss
+  agents:
+  - config:
+      persistence:
+        agent_state:
+          namespace: agents_state
+          backend: kv_default
+        responses:
+          table_name: agents_responses
+          backend: sql_default
+    provider_id: meta-reference
+    provider_type: inline::meta-reference
+  batches:
+  - config:
+      kvstore:
+        namespace: batches_store
+        backend: kv_default
+    provider_id: reference
+    provider_type: inline::reference
+  datasetio:
+  - config:
+      kvstore:
+        namespace: huggingface_datasetio
+        backend: kv_default
+    provider_id: huggingface
+    provider_type: remote::huggingface
+  - config:
+      kvstore:
+        namespace: localfs_datasetio
+        backend: kv_default
+    provider_id: localfs
+    provider_type: inline::localfs
+  eval:
+  - config:
+      kvstore:
+        namespace: eval_store
+        backend: kv_default
+    provider_id: meta-reference
+    provider_type: inline::meta-reference
+scoring_fns: []
+telemetry:
+  enabled: true
+server:
+  port: 8321
+storage:
+  backends:
+    kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db}
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db}
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models: 
+  - model_id: custom-watsonx-model
+    provider_id: watsonx
+    model_type: llm
+    provider_model_id: watsonx/meta-llama/llama-3-3-70b-instruct
+  shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::rag # Register the RAG tool
+    provider_id: rag-runtime
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model: # Define the default embedding model for RAG
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py
index 75050c73..6887baee 100644
--- a/src/app/endpoints/query.py
+++ b/src/app/endpoints/query.py
@@ -20,9 +20,9 @@
     Toolgroup,
     ToolgroupAgentToolGroupWithArgs,
 )
+from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep
 from llama_stack_client.types.model_list_response import ModelListResponse
 from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
-from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep
 from sqlalchemy.exc import SQLAlchemyError
 
 import constants
@@ -41,8 +41,8 @@
     ForbiddenResponse,
     InternalServerErrorResponse,
     NotFoundResponse,
-    QueryResponse,
     PromptTooLongResponse,
+    QueryResponse,
     QuotaExceededResponse,
     ReferencedDocument,
     ServiceUnavailableResponse,
@@ -540,7 +540,8 @@ def select_model_and_provider_id(
     logger.debug("Searching for model: %s, provider: %s", model_id, provider_id)
     # TODO: Create sepparate validation of provider
     if not any(
-        m.identifier == llama_stack_model_id and m.provider_id == provider_id
+        m.identifier in (llama_stack_model_id, model_id)
+        and m.provider_id == provider_id
         for m in models
     ):
         message = f"Model {model_id} from provider {provider_id} not found in available models"
diff --git a/tests/e2e/configs/run-watsonx.yaml b/tests/e2e/configs/run-watsonx.yaml
new file mode 100644
index 00000000..3d3d6c0c
--- /dev/null
+++ b/tests/e2e/configs/run-watsonx.yaml
@@ -0,0 +1,161 @@
+version: 2
+
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+      
+benchmarks: []
+conversations_store:
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db}
+  type: sqlite
+datasets: []
+image_name: starter
+# external_providers_dir: /opt/app-root/src/.llama/providers.d
+inference_store:
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db}
+  type: sqlite
+metadata_store:
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db}
+  type: sqlite
+
+providers:
+  inference:
+  - provider_id: watsonx
+    provider_type: remote::watsonx
+    config:
+      url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
+      api_key: ${env.WATSONX_API_KEY:=key-not-set}
+      project_id: ${env.WATSONX_PROJECT_ID:=project-not-set}
+      timeout: 1200
+  - config: {}
+    provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  files:
+  - config:
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+      storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files}
+    provider_id: meta-reference-files
+    provider_type: inline::localfs
+  safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls
+  # - config:
+  #     excluded_categories: []
+  #   provider_id: llama-guard
+  #   provider_type: inline::llama-guard
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: '********'
+  tool_runtime:
+  - config: {} # Enable the RAG tool
+    provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  vector_io:
+  - config: # Define the storage backend for RAG
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+    provider_id: faiss
+    provider_type: inline::faiss
+  agents:
+  - config:
+      persistence:
+        agent_state:
+          namespace: agents_state
+          backend: kv_default
+        responses:
+          table_name: agents_responses
+          backend: sql_default
+    provider_id: meta-reference
+    provider_type: inline::meta-reference
+  batches:
+  - config:
+      kvstore:
+        namespace: batches_store
+        backend: kv_default
+    provider_id: reference
+    provider_type: inline::reference
+  datasetio:
+  - config:
+      kvstore:
+        namespace: huggingface_datasetio
+        backend: kv_default
+    provider_id: huggingface
+    provider_type: remote::huggingface
+  - config:
+      kvstore:
+        namespace: localfs_datasetio
+        backend: kv_default
+    provider_id: localfs
+    provider_type: inline::localfs
+  eval:
+  - config:
+      kvstore:
+        namespace: eval_store
+        backend: kv_default
+    provider_id: meta-reference
+    provider_type: inline::meta-reference
+scoring_fns: []
+telemetry:
+  enabled: true
+server:
+  port: 8321
+storage:
+  backends:
+    kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db}
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db}
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models: 
+  - model_id: custom-watsonx-model
+    provider_id: watsonx
+    model_type: llm
+    provider_model_id: watsonx/meta-llama/llama-3-3-70b-instruct
+  shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::rag # Register the RAG tool
+    provider_id: rag-runtime
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model: # Define the default embedding model for RAG
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
diff --git a/tests/e2e/features/conversations.feature b/tests/e2e/features/conversations.feature
index 0fecb051..a3f04078 100644
--- a/tests/e2e/features/conversations.feature
+++ b/tests/e2e/features/conversations.feature
@@ -11,7 +11,7 @@ Feature: conversations endpoint API tests
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
     And I use "query" to ask question with authorization header
     """
-    {"query": "Say hello"}
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     And The status code of the response is 200
     And I store conversation details
@@ -28,7 +28,7 @@ Feature: conversations endpoint API tests
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
     And I use "query" to ask question with authorization header
     """
-    {"query": "Say hello"}
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     And The status code of the response is 200
     And I store conversation details
@@ -50,7 +50,7 @@ Feature: conversations endpoint API tests
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
     And I use "query" to ask question with authorization header
     """
-    {"query": "Say hello"}
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     And The status code of the response is 200
     And I store conversation details
@@ -97,7 +97,7 @@ Feature: conversations endpoint API tests
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
     And I use "query" to ask question with authorization header
     """
-    {"query": "Say hello"}
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     And The status code of the response is 200
     And I store conversation details
@@ -135,7 +135,7 @@ Feature: conversations endpoint API tests
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
     And I use "query" to ask question with authorization header
     """
-    {"query": "Say hello"}
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     And The status code of the response is 200
     And I store conversation details
@@ -149,7 +149,7 @@ Feature: conversations endpoint API tests
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
     And I use "query" to ask question with authorization header
     """
-    {"query": "Say hello"}
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     And The status code of the response is 200
     And I store conversation details
@@ -187,7 +187,7 @@ Feature: conversations endpoint API tests
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
     And I use "query" to ask question with authorization header
     """
-    {"query": "Say hello"}
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     And The status code of the response is 200
     And I store conversation details
diff --git a/tests/e2e/features/environment.py b/tests/e2e/features/environment.py
index 09b7feef..f2dbdb07 100644
--- a/tests/e2e/features/environment.py
+++ b/tests/e2e/features/environment.py
@@ -58,21 +58,32 @@ def before_all(context: Context) -> None:
     # Get first LLM model from running service
     print(f"Running tests in {context.deployment_mode} mode")
 
-    llm_model = _fetch_models_from_service()
+    # Check for environment variable overrides first
+    model_override = os.getenv("E2E_DEFAULT_MODEL_OVERRIDE")
+    provider_override = os.getenv("E2E_DEFAULT_PROVIDER_OVERRIDE")
 
-    if llm_model:
-        context.default_model = llm_model["model_id"]
-        context.default_provider = llm_model["provider_id"]
+    if model_override and provider_override:
+        context.default_model = model_override
+        context.default_provider = provider_override
         print(
-            f"Detected LLM: {context.default_model} (provider: {context.default_provider})"
+            f"Using override LLM: {context.default_model} (provider: {context.default_provider})"
         )
     else:
-        # Fallback for development
-        context.default_model = "gpt-4o-mini"
-        context.default_provider = "openai"
-        print(
-            f"⚠ Could not detect models, using fallback: {context.default_provider}/{context.default_model}"
-        )
+        llm_model = _fetch_models_from_service()
+
+        if llm_model:
+            context.default_model = llm_model["model_id"]
+            context.default_provider = llm_model["provider_id"]
+            print(
+                f"Detected LLM: {context.default_model} (provider: {context.default_provider})"
+            )
+        else:
+            # Fallback for development
+            context.default_model = "gpt-4o-mini"
+            context.default_provider = "openai"
+            print(
+                f"⚠ Could not detect models, using fallback: {context.default_provider}/{context.default_model}"
+            )
 
 
 def before_scenario(context: Context, scenario: Scenario) -> None:
diff --git a/tests/e2e/features/query.feature b/tests/e2e/features/query.feature
index 5491afa5..a02312e6 100644
--- a/tests/e2e/features/query.feature
+++ b/tests/e2e/features/query.feature
@@ -10,7 +10,7 @@ Feature: Query endpoint API tests
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
     When I use "query" to ask question with authorization header
     """
-    {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything but openshift questions"}
+    {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything but openshift questions", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
      Then The status code of the response is 200
       And The response should contain following fragments
@@ -22,7 +22,7 @@ Feature: Query endpoint API tests
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
     When I use "query" to ask question with authorization header
     """
-    {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "you are linguistic assistant"}
+    {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "you are linguistic assistant", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
      Then The status code of the response is 200
       And The response should contain following fragments
@@ -34,7 +34,7 @@ Feature: Query endpoint API tests
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
     When I use "query" to ask question with authorization header
     """
-    {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything but openshift questions"}
+    {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything but openshift questions", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     Then The status code of the response is 200
     And I store conversation details
@@ -51,7 +51,7 @@ Feature: Query endpoint API tests
     Given The system is in default state
      When I use "query" to ask question
      """
-     {"query": "Write a simple code for reversing string"}
+     {"query": "Write a simple code for reversing string", "model": "{MODEL}", "provider": "{PROVIDER}"}
      """
       Then The status code of the response is 401
       And The body of the response is the following
@@ -69,7 +69,7 @@ Feature: Query endpoint API tests
      And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
      When I use "query" to ask question with authorization header
      """
-     {"conversation_id": "123e4567-e89b-12d3-a456-426614174000", "query": "Write a simple code for reversing string"}
+     {"conversation_id": "123e4567-e89b-12d3-a456-426614174000", "query": "Write a simple code for reversing string", "model": "{MODEL}", "provider": "{PROVIDER}"}
      """
       Then The status code of the response is 404
       And The body of the response contains Conversation not found
diff --git a/tests/e2e/features/streaming_query.feature b/tests/e2e/features/streaming_query.feature
index ca69aa7a..3208388f 100644
--- a/tests/e2e/features/streaming_query.feature
+++ b/tests/e2e/features/streaming_query.feature
@@ -11,7 +11,7 @@ Feature: streaming_query endpoint API tests
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
     And I use "streaming_query" to ask question with authorization header
     """
-    {"query": "Generate sample yaml file for simple GitHub Actions workflow."}
+    {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
      When I wait for the response to be completed
      Then The status code of the response is 200
@@ -22,7 +22,7 @@ Feature: streaming_query endpoint API tests
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
     And I use "streaming_query" to ask question with authorization header
     """
-    {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything but openshift questions"}
+    {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything but openshift questions", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
      When I wait for the response to be completed
      Then The status code of the response is 200
@@ -35,7 +35,7 @@ Feature: streaming_query endpoint API tests
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
     And I use "streaming_query" to ask question with authorization header
     """
-    {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "you are linguistic assistant"}
+    {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "you are linguistic assistant", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
      When I wait for the response to be completed
      Then The status code of the response is 200
@@ -48,7 +48,7 @@ Feature: streaming_query endpoint API tests
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
     And I use "streaming_query" to ask question with authorization header
     """
-    {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything"}
+    {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     When I wait for the response to be completed
     And I use "streaming_query" to ask question with same conversation_id
@@ -124,7 +124,7 @@ Feature: streaming_query endpoint API tests
     Given The system is in default state
      When I use "streaming_query" to ask question
      """
-     {"query": "Say hello"}
+     {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
      """
       Then The status code of the response is 401
       And The body of the response is the following