diff --git a/.github/workflows/e2e_tests.yaml b/.github/workflows/e2e_tests.yaml index a80de744..371a3e87 100644 --- a/.github/workflows/e2e_tests.yaml +++ b/.github/workflows/e2e_tests.yaml @@ -10,7 +10,7 @@ jobs: fail-fast: false matrix: mode: ["server", "library"] - environment: ["ci", "azure", "vertexai"] + environment: ["ci", "azure", "vertexai", "watsonx"] name: "E2E: ${{ matrix.mode }} mode / ${{ matrix.environment }}" @@ -200,6 +200,8 @@ jobs: VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }} GOOGLE_APPLICATION_CREDENTIALS: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }} GCP_KEYS_PATH: ${{ env.GCP_KEYS_PATH }} + WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }} + WATSONX_API_KEY: ${{ secrets.WATSONX_API_KEY }} run: | # Debug: Check if environment variable is available for docker-compose echo "OPENAI_API_KEY is set: $([ -n "$OPENAI_API_KEY" ] && echo 'YES' || echo 'NO')" @@ -226,6 +228,8 @@ jobs: VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }} GOOGLE_APPLICATION_CREDENTIALS: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }} GCP_KEYS_PATH: ${{ env.GCP_KEYS_PATH }} + WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }} + WATSONX_API_KEY: ${{ secrets.WATSONX_API_KEY }} run: | echo "Starting service in library mode (1 container)" docker compose -f docker-compose-library.yaml up -d @@ -256,6 +260,13 @@ jobs: exit 1 } + # watsonx has a different convention than "/" + - name: Set watsonx test overrides + if: matrix.environment == 'watsonx' + run: | + echo "E2E_DEFAULT_MODEL_OVERRIDE=watsonx/watsonx/meta-llama/llama-3-3-70b-instruct" >> $GITHUB_ENV + echo "E2E_DEFAULT_PROVIDER_OVERRIDE=watsonx" >> $GITHUB_ENV + - name: Run e2e tests env: TERM: xterm-256color diff --git a/README.md b/README.md index c7a84a1b..27df9d80 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,7 @@ Lightspeed Core Stack is based on the FastAPI framework (Uvicorn). The service i | OpenAI | https://platform.openai.com | | Azure OpenAI | https://azure.microsoft.com/en-us/products/ai-services/openai-service | | Google VertexAI| https://cloud.google.com/vertex-ai | + | IBM WatsonX | https://www.ibm.com/products/watsonx | | RHOAI (vLLM) | See tests/e2e-prow/rhoai/configs/run.yaml | | RHEL AI (vLLM) | See tests/e2e/configs/run-rhelai.yaml | @@ -177,6 +178,7 @@ __Note__: Support for individual models is dependent on the specific inference p | Azure | gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3-mini, o4-mini | Yes | remote::azure | [1](examples/azure-run.yaml) | | Azure | o1, o1-mini | No | remote::azure | | | VertexAI | google/gemini-2.0-flash, google/gemini-2.5-flash, google/gemini-2.5-pro [^1] | Yes | remote::vertexai | [1](examples/vertexai-run.yaml) | +| WatsonX | meta-llama/llama-3-3-70b-instruct | Yes | remote::watsonx | [1](examples/watsonx-run.yaml) | [^1]: List of models is limited by design in llama-stack, future versions will probably allow to use more models (see [here](https://github.com/llamastack/llama-stack/blob/release-0.3.x/llama_stack/providers/remote/inference/vertexai/vertexai.py#L54)) diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml index 4733d5d6..8c164ab2 100644 --- a/docker-compose-library.yaml +++ b/docker-compose-library.yaml @@ -34,6 +34,10 @@ services: - GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-} - VERTEX_AI_PROJECT=${VERTEX_AI_PROJECT:-} - VERTEX_AI_LOCATION=${VERTEX_AI_LOCATION:-} + # WatsonX + - WATSONX_BASE_URL=${WATSONX_BASE_URL:-} + - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID:-} + - WATSONX_API_KEY=${WATSONX_API_KEY:-} # Enable debug logging if needed - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-} healthcheck: diff --git a/docker-compose.yaml b/docker-compose.yaml index 3b00c381..292d6886 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -32,6 +32,10 @@ services: - GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-} - VERTEX_AI_PROJECT=${VERTEX_AI_PROJECT:-} - VERTEX_AI_LOCATION=${VERTEX_AI_LOCATION:-} + # WatsonX + - WATSONX_BASE_URL=${WATSONX_BASE_URL:-} + - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID:-} + - WATSONX_API_KEY=${WATSONX_API_KEY:-} # Enable debug logging if needed - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-} networks: diff --git a/docs/providers.md b/docs/providers.md index 32f320dc..0f3891e1 100644 --- a/docs/providers.md +++ b/docs/providers.md @@ -55,7 +55,7 @@ The tables below summarize each provider category, containing the following atri | tgi | remote | `huggingface_hub`, `aiohttp` | ❌ | | together | remote | `together` | ❌ | | vertexai | remote | `google-auth` | ✅ | -| watsonx | remote | `ibm_watsonx_ai` | ❌ | +| watsonx | remote | `litellm` | ✅ | Red Hat providers: diff --git a/examples/watsonx-run.yaml b/examples/watsonx-run.yaml new file mode 100644 index 00000000..3d3d6c0c --- /dev/null +++ b/examples/watsonx-run.yaml @@ -0,0 +1,161 @@ +version: 2 + +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io + +benchmarks: [] +conversations_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db} + type: sqlite +datasets: [] +image_name: starter +# external_providers_dir: /opt/app-root/src/.llama/providers.d +inference_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db} + type: sqlite +metadata_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db} + type: sqlite + +providers: + inference: + - provider_id: watsonx + provider_type: remote::watsonx + config: + url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} + api_key: ${env.WATSONX_API_KEY:=key-not-set} + project_id: ${env.WATSONX_PROJECT_ID:=project-not-set} + timeout: 1200 + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files} + provider_id: meta-reference-files + provider_type: inline::localfs + safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + # - config: + # excluded_categories: [] + # provider_id: llama-guard + # provider_type: inline::llama-guard + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: '********' + tool_runtime: + - config: {} # Enable the RAG tool + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: # Define the storage backend for RAG + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss + agents: + - config: + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default + provider_id: meta-reference + provider_type: inline::meta-reference + batches: + - config: + kvstore: + namespace: batches_store + backend: kv_default + provider_id: reference + provider_type: inline::reference + datasetio: + - config: + kvstore: + namespace: huggingface_datasetio + backend: kv_default + provider_id: huggingface + provider_type: remote::huggingface + - config: + kvstore: + namespace: localfs_datasetio + backend: kv_default + provider_id: localfs + provider_type: inline::localfs + eval: + - config: + kvstore: + namespace: eval_store + backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference +scoring_fns: [] +telemetry: + enabled: true +server: + port: 8321 +storage: + backends: + kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - model_id: custom-watsonx-model + provider_id: watsonx + model_type: llm + provider_model_id: watsonx/meta-llama/llama-3-3-70b-instruct + shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag # Register the RAG tool + provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: # Define the default embedding model for RAG + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py index 75050c73..6887baee 100644 --- a/src/app/endpoints/query.py +++ b/src/app/endpoints/query.py @@ -20,9 +20,9 @@ Toolgroup, ToolgroupAgentToolGroupWithArgs, ) +from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep from llama_stack_client.types.model_list_response import ModelListResponse from llama_stack_client.types.shared.interleaved_content_item import TextContentItem -from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep from sqlalchemy.exc import SQLAlchemyError import constants @@ -41,8 +41,8 @@ ForbiddenResponse, InternalServerErrorResponse, NotFoundResponse, - QueryResponse, PromptTooLongResponse, + QueryResponse, QuotaExceededResponse, ReferencedDocument, ServiceUnavailableResponse, @@ -540,7 +540,8 @@ def select_model_and_provider_id( logger.debug("Searching for model: %s, provider: %s", model_id, provider_id) # TODO: Create sepparate validation of provider if not any( - m.identifier == llama_stack_model_id and m.provider_id == provider_id + m.identifier in (llama_stack_model_id, model_id) + and m.provider_id == provider_id for m in models ): message = f"Model {model_id} from provider {provider_id} not found in available models" diff --git a/tests/e2e/configs/run-watsonx.yaml b/tests/e2e/configs/run-watsonx.yaml new file mode 100644 index 00000000..3d3d6c0c --- /dev/null +++ b/tests/e2e/configs/run-watsonx.yaml @@ -0,0 +1,161 @@ +version: 2 + +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io + +benchmarks: [] +conversations_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db} + type: sqlite +datasets: [] +image_name: starter +# external_providers_dir: /opt/app-root/src/.llama/providers.d +inference_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db} + type: sqlite +metadata_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db} + type: sqlite + +providers: + inference: + - provider_id: watsonx + provider_type: remote::watsonx + config: + url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} + api_key: ${env.WATSONX_API_KEY:=key-not-set} + project_id: ${env.WATSONX_PROJECT_ID:=project-not-set} + timeout: 1200 + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files} + provider_id: meta-reference-files + provider_type: inline::localfs + safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + # - config: + # excluded_categories: [] + # provider_id: llama-guard + # provider_type: inline::llama-guard + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: '********' + tool_runtime: + - config: {} # Enable the RAG tool + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: # Define the storage backend for RAG + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss + agents: + - config: + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default + provider_id: meta-reference + provider_type: inline::meta-reference + batches: + - config: + kvstore: + namespace: batches_store + backend: kv_default + provider_id: reference + provider_type: inline::reference + datasetio: + - config: + kvstore: + namespace: huggingface_datasetio + backend: kv_default + provider_id: huggingface + provider_type: remote::huggingface + - config: + kvstore: + namespace: localfs_datasetio + backend: kv_default + provider_id: localfs + provider_type: inline::localfs + eval: + - config: + kvstore: + namespace: eval_store + backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference +scoring_fns: [] +telemetry: + enabled: true +server: + port: 8321 +storage: + backends: + kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - model_id: custom-watsonx-model + provider_id: watsonx + model_type: llm + provider_model_id: watsonx/meta-llama/llama-3-3-70b-instruct + shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag # Register the RAG tool + provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: # Define the default embedding model for RAG + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/tests/e2e/features/conversations.feature b/tests/e2e/features/conversations.feature index 0fecb051..a3f04078 100644 --- a/tests/e2e/features/conversations.feature +++ b/tests/e2e/features/conversations.feature @@ -11,7 +11,7 @@ Feature: conversations endpoint API tests And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva And I use "query" to ask question with authorization header """ - {"query": "Say hello"} + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} """ And The status code of the response is 200 And I store conversation details @@ -28,7 +28,7 @@ Feature: conversations endpoint API tests And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva And I use "query" to ask question with authorization header """ - {"query": "Say hello"} + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} """ And The status code of the response is 200 And I store conversation details @@ -50,7 +50,7 @@ Feature: conversations endpoint API tests And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva And I use "query" to ask question with authorization header """ - {"query": "Say hello"} + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} """ And The status code of the response is 200 And I store conversation details @@ -97,7 +97,7 @@ Feature: conversations endpoint API tests And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva And I use "query" to ask question with authorization header """ - {"query": "Say hello"} + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} """ And The status code of the response is 200 And I store conversation details @@ -135,7 +135,7 @@ Feature: conversations endpoint API tests And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva And I use "query" to ask question with authorization header """ - {"query": "Say hello"} + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} """ And The status code of the response is 200 And I store conversation details @@ -149,7 +149,7 @@ Feature: conversations endpoint API tests And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva And I use "query" to ask question with authorization header """ - {"query": "Say hello"} + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} """ And The status code of the response is 200 And I store conversation details @@ -187,7 +187,7 @@ Feature: conversations endpoint API tests And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva And I use "query" to ask question with authorization header """ - {"query": "Say hello"} + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} """ And The status code of the response is 200 And I store conversation details diff --git a/tests/e2e/features/environment.py b/tests/e2e/features/environment.py index 09b7feef..f2dbdb07 100644 --- a/tests/e2e/features/environment.py +++ b/tests/e2e/features/environment.py @@ -58,21 +58,32 @@ def before_all(context: Context) -> None: # Get first LLM model from running service print(f"Running tests in {context.deployment_mode} mode") - llm_model = _fetch_models_from_service() + # Check for environment variable overrides first + model_override = os.getenv("E2E_DEFAULT_MODEL_OVERRIDE") + provider_override = os.getenv("E2E_DEFAULT_PROVIDER_OVERRIDE") - if llm_model: - context.default_model = llm_model["model_id"] - context.default_provider = llm_model["provider_id"] + if model_override and provider_override: + context.default_model = model_override + context.default_provider = provider_override print( - f"Detected LLM: {context.default_model} (provider: {context.default_provider})" + f"Using override LLM: {context.default_model} (provider: {context.default_provider})" ) else: - # Fallback for development - context.default_model = "gpt-4o-mini" - context.default_provider = "openai" - print( - f"⚠ Could not detect models, using fallback: {context.default_provider}/{context.default_model}" - ) + llm_model = _fetch_models_from_service() + + if llm_model: + context.default_model = llm_model["model_id"] + context.default_provider = llm_model["provider_id"] + print( + f"Detected LLM: {context.default_model} (provider: {context.default_provider})" + ) + else: + # Fallback for development + context.default_model = "gpt-4o-mini" + context.default_provider = "openai" + print( + f"⚠ Could not detect models, using fallback: {context.default_provider}/{context.default_model}" + ) def before_scenario(context: Context, scenario: Scenario) -> None: diff --git a/tests/e2e/features/query.feature b/tests/e2e/features/query.feature index 5491afa5..a02312e6 100644 --- a/tests/e2e/features/query.feature +++ b/tests/e2e/features/query.feature @@ -10,7 +10,7 @@ Feature: Query endpoint API tests And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva When I use "query" to ask question with authorization header """ - {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything but openshift questions"} + {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything but openshift questions", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 And The response should contain following fragments @@ -22,7 +22,7 @@ Feature: Query endpoint API tests And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva When I use "query" to ask question with authorization header """ - {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "you are linguistic assistant"} + {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "you are linguistic assistant", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 And The response should contain following fragments @@ -34,7 +34,7 @@ Feature: Query endpoint API tests And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva When I use "query" to ask question with authorization header """ - {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything but openshift questions"} + {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything but openshift questions", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 And I store conversation details @@ -51,7 +51,7 @@ Feature: Query endpoint API tests Given The system is in default state When I use "query" to ask question """ - {"query": "Write a simple code for reversing string"} + {"query": "Write a simple code for reversing string", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 401 And The body of the response is the following @@ -69,7 +69,7 @@ Feature: Query endpoint API tests And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva When I use "query" to ask question with authorization header """ - {"conversation_id": "123e4567-e89b-12d3-a456-426614174000", "query": "Write a simple code for reversing string"} + {"conversation_id": "123e4567-e89b-12d3-a456-426614174000", "query": "Write a simple code for reversing string", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 404 And The body of the response contains Conversation not found diff --git a/tests/e2e/features/streaming_query.feature b/tests/e2e/features/streaming_query.feature index ca69aa7a..3208388f 100644 --- a/tests/e2e/features/streaming_query.feature +++ b/tests/e2e/features/streaming_query.feature @@ -11,7 +11,7 @@ Feature: streaming_query endpoint API tests And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva And I use "streaming_query" to ask question with authorization header """ - {"query": "Generate sample yaml file for simple GitHub Actions workflow."} + {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ When I wait for the response to be completed Then The status code of the response is 200 @@ -22,7 +22,7 @@ Feature: streaming_query endpoint API tests And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva And I use "streaming_query" to ask question with authorization header """ - {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything but openshift questions"} + {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything but openshift questions", "model": "{MODEL}", "provider": "{PROVIDER}"} """ When I wait for the response to be completed Then The status code of the response is 200 @@ -35,7 +35,7 @@ Feature: streaming_query endpoint API tests And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva And I use "streaming_query" to ask question with authorization header """ - {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "you are linguistic assistant"} + {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "you are linguistic assistant", "model": "{MODEL}", "provider": "{PROVIDER}"} """ When I wait for the response to be completed Then The status code of the response is 200 @@ -48,7 +48,7 @@ Feature: streaming_query endpoint API tests And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva And I use "streaming_query" to ask question with authorization header """ - {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything"} + {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything", "model": "{MODEL}", "provider": "{PROVIDER}"} """ When I wait for the response to be completed And I use "streaming_query" to ask question with same conversation_id @@ -124,7 +124,7 @@ Feature: streaming_query endpoint API tests Given The system is in default state When I use "streaming_query" to ask question """ - {"query": "Say hello"} + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 401 And The body of the response is the following