File tree Expand file tree Collapse file tree 7 files changed +64
-4
lines changed Expand file tree Collapse file tree 7 files changed +64
-4
lines changed Original file line number Diff line number Diff line change @@ -124,6 +124,7 @@ run: check-env-run
124124 --env VLLM_API_TOKEN=$(ANSIBLE_CHATBOT_VLLM_API_TOKEN ) \
125125 --env INFERENCE_MODEL=$(ANSIBLE_CHATBOT_INFERENCE_MODEL ) \
126126 --env INFERENCE_MODEL_FILTER=$(ANSIBLE_CHATBOT_INFERENCE_MODEL_FILTER ) \
127+ --env GEMINI_API_KEY=$(GEMINI_API_KEY ) \
127128 ansible-chatbot-stack:$(ANSIBLE_CHATBOT_VERSION )
128129
129130run-test :
@@ -162,6 +163,7 @@ run-local-db: check-env-run-local-db
162163 --env VLLM_API_TOKEN=$(ANSIBLE_CHATBOT_VLLM_API_TOKEN ) \
163164 --env INFERENCE_MODEL=$(ANSIBLE_CHATBOT_INFERENCE_MODEL ) \
164165 --env INFERENCE_MODEL_FILTER=$(ANSIBLE_CHATBOT_INFERENCE_MODEL_FILTER ) \
166+ --env GEMINI_API_KEY=$(GEMINI_API_KEY ) \
165167 ansible-chatbot-stack:$(ANSIBLE_CHATBOT_VERSION )
166168
167169clean :
Original file line number Diff line number Diff line change @@ -154,6 +154,20 @@ Runs basic tests against the local container.
154154 kubectl apply -f my-chatbot-stack-deploy.yaml
155155```
156156
157+ ## Appendix - Google Gemini
158+
159+ * Please set the environment variable ` OPENAI_API_KEY=<YOUR_API_KEY> `
160+ * Example of a ` v1/query ` request:
161+ ``` json
162+ {
163+ "query" : " hello" ,
164+ "system_prompt" : " You are a helpful assistant." ,
165+ "model" : " gemini/gemini-2.5-flash" ,
166+ "provider" : " gemini"
167+ }
168+ ```
169+
170+
157171## Appendix - Host clean-up
158172
159173If you have the need for re-building images, apply the following clean-ups right before:
Original file line number Diff line number Diff line change @@ -18,6 +18,10 @@ providers:
1818 max_tokens : ${env.VLLM_MAX_TOKENS:=4096}
1919 api_token : ${env.VLLM_API_TOKEN:=fake}
2020 tls_verify : ${env.VLLM_TLS_VERIFY:=true}
21+ - provider_id : gemini
22+ provider_type : remote::gemini
23+ config :
24+ api_key : ${env.GEMINI_API_KEY:=fake}
2125 - provider_id : inline_sentence-transformer
2226 provider_type : inline::sentence-transformers
2327 config : {}
@@ -85,6 +89,11 @@ models:
8589 model_id : ${env.EMBEDDINGS_MODEL:=/.llama/data/distributions/ansible-chatbot/embeddings_model}
8690 provider_id : inline_sentence-transformer
8791 model_type : embedding
92+ - metadata : {}
93+ model_id : ${env.GEMINI_INFERENCE_MODEL:=gemini/gemini-2.5-flash}
94+ provider_id : gemini
95+ provider_model_id : gemini/gemini-2.5-flash
96+ model_type : llm
8897shields : []
8998vector_dbs :
9099- metadata : {}
Original file line number Diff line number Diff line change @@ -18,6 +18,10 @@ providers:
1818 max_tokens : ${env.VLLM_MAX_TOKENS:=4096}
1919 api_token : ${env.VLLM_API_TOKEN:=fake}
2020 tls_verify : ${env.VLLM_TLS_VERIFY:=true}
21+ - provider_id : gemini
22+ provider_type : remote::gemini
23+ config :
24+ api_key : ${env.GEMINI_API_KEY:=fake}
2125 - provider_id : inline_sentence-transformer
2226 provider_type : inline::sentence-transformers
2327 config : {}
@@ -85,6 +89,11 @@ models:
8589 model_id : ${env.EMBEDDINGS_MODEL:=/.llama/data/distributions/ansible-chatbot/embeddings_model}
8690 provider_id : inline_sentence-transformer
8791 model_type : embedding
92+ - metadata : {}
93+ model_id : ${env.GEMINI_INFERENCE_MODEL:=gemini/gemini-2.5-flash}
94+ provider_id : gemini
95+ provider_model_id : gemini/gemini-2.5-flash
96+ model_type : llm
8897shields : []
8998vector_dbs :
9099- metadata : {}
Original file line number Diff line number Diff line change @@ -15,6 +15,7 @@ dependencies = [
1515 " opentelemetry-exporter-otlp~=1.34.1" ,
1616 " sentence-transformers>=5.0.0" ,
1717 " sqlalchemy~=2.0.41" ,
18+ " litellm~=1.75.3" ,
1819]
1920
2021[dependency-groups ]
Original file line number Diff line number Diff line change @@ -37,6 +37,7 @@ joblib==1.5.1
3737jsonschema == 4.24.0
3838jsonschema-specifications == 2025.4.1
3939lightspeed-stack-providers == 0.1.14
40+ litellm == 1.75.5.post1
4041llama-api-client == 0.1.2
4142llama-stack == 0.2.16
4243llama-stack-client == 0.2.16
@@ -62,7 +63,7 @@ nvidia-cusparselt-cu12==0.6.3 ; platform_machine == 'x86_64' and sys_platform ==
6263nvidia-nccl-cu12 == 2.26.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
6364nvidia-nvjitlink-cu12 == 12.6.85 ; platform_machine == 'x86_64' and sys_platform == 'linux'
6465nvidia-nvtx-cu12 == 12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
65- openai == 1.91.0
66+ openai == 1.99.9
6667opentelemetry-api == 1.34.1
6768opentelemetry-exporter-otlp == 1.34.1
6869opentelemetry-exporter-otlp-proto-common == 1.34.1
You can’t perform that action at this time.
0 commit comments