Skip to content

Commit c3c578f

Browse files
refactor: Source online model from environment variables
- Refactors the `/generate-and-run` endpoint in `main.py` to exclusively use the `ONLINE_MODEL` environment variable. - Removes the `model` field from the API request body, making the environment the single source of truth for model configuration. - The application will now raise an error if the `ONLINE_MODEL` is not set in the environment when the online provider is selected.
1 parent 7c2e171 commit c3c578f

File tree

3 files changed

+58
-10
lines changed

3 files changed

+58
-10
lines changed

backend/.env.example

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
# The model provider to use. Can be "online" or "local".
22
MODEL_PROVIDER=online
33

4-
# The online model to use (e.g., "gemini-1.5-pro-latest").
4+
# The online model to use (e.g., "gemini-2.5-pro").
55
# This is only used if MODEL_PROVIDER is "online".
6-
ONLINE_MODEL=gemini-1.5-pro-latest
6+
ONLINE_MODEL=gemini-2.5-pro
77

8-
# The local model to use with Ollama (e.g., "llama3", "codellama").
8+
# The local model to use with Ollama (e.g., "qwen2.5-coder:14b").
99
# This is only used if MODEL_PROVIDER is "local".
10-
LOCAL_MODEL=llama3
10+
LOCAL_MODEL=qwen2.5-coder:14b
11+
12+
# --- Gemini Configuration ---
13+
# Your Google Gemini API key. This is required if MODEL_PROVIDER is "online".
14+
GEMINI_API_KEY=

backend/main.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
# --- Pydantic Models ---
2727
class Query(BaseModel):
2828
query: str
29-
model: str = "gemini-1.5-pro-latest"
3029

3130
# --- FastAPI App ---
3231
app = FastAPI()
@@ -169,11 +168,14 @@ async def generate_and_run_streaming(query: Query):
169168
raise HTTPException(status_code=400, detail="Query not provided")
170169

171170
model_provider = os.getenv("MODEL_PROVIDER", "online").lower()
171+
model_name = "" # initialize
172172
if model_provider == "local":
173-
model_name = os.getenv("LOCAL_MODEL", "llama3")
173+
model_name = os.getenv("LOCAL_MODEL", "qwen2.5-coder:14b")
174174
logging.info(f"Using local model provider: {model_name}")
175175
else:
176-
model_name = os.getenv("ONLINE_MODEL", query.model)
176+
model_name = os.getenv("ONLINE_MODEL")
177+
if not model_name:
178+
raise HTTPException(status_code=500, detail="ONLINE_MODEL environment variable is not set.")
177179
logging.info(f"Using online model provider: {model_name}")
178180

179181
return StreamingResponse(stream_generate_and_run(user_query, model_name), media_type="text/event-stream")

backend/robot_generator.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,53 @@
22
import json
33
import logging
44
import time
5+
import re
56
from typing import List, Optional
67
from pydantic import BaseModel, Field
78
import google.generativeai as genai
9+
from google.api_core.exceptions import ResourceExhausted
810
import ollama
911

12+
13+
# --- Gemini API Wrapper with Dynamic Retry Logic ---
14+
def call_gemini_with_retry(model, prompt: str, max_retries: int = 2):
15+
"""
16+
Calls the Gemini API with a dynamic retry mechanism based on the API's feedback.
17+
"""
18+
attempt = 0
19+
while attempt < max_retries:
20+
try:
21+
response = model.generate_content(prompt)
22+
return response
23+
except ResourceExhausted as e:
24+
attempt += 1
25+
error_message = str(e)
26+
27+
# Use regex to find the retry delay in the error message
28+
match = re.search(r"retry_delay {\s*seconds: (\d+)\s*}", error_message)
29+
30+
if match:
31+
wait_time = int(match.group(1)) + 1 # Add a 1-second buffer
32+
logging.warning(
33+
f"Gemini API quota exceeded. Retrying after {wait_time} seconds (attempt {attempt}/{max_retries})."
34+
)
35+
time.sleep(wait_time)
36+
else:
37+
# If no specific delay is found, wait a default time or re-raise
38+
logging.warning(
39+
f"Gemini API quota exceeded, but no retry_delay found. "
40+
f"Waiting 60 seconds before attempt {attempt}/{max_retries}."
41+
)
42+
time.sleep(60) # Fallback wait time
43+
44+
except Exception as e:
45+
logging.error(f"An unexpected error occurred calling Gemini API: {e}")
46+
raise e # Re-raise other exceptions immediately
47+
48+
logging.error(f"Gemini API call failed after {max_retries} attempts.")
49+
raise Exception("Gemini API call failed after multiple retries.")
50+
51+
1052
# --- Pydantic Models for Agent Communication ---
1153
# These models define the "contracts" for data passed between agents.
1254

@@ -92,7 +134,7 @@ def agent_step_planner(query: str, model_provider: str, model_name: str) -> List
92134
cleaned_response = response['message']['content']
93135
else: # Default to online
94136
model = genai.GenerativeModel(model_name)
95-
response = model.generate_content(prompt)
137+
response = call_gemini_with_retry(model, prompt)
96138
cleaned_response = response.text.strip().lstrip("```json").rstrip("```").strip()
97139

98140
planned_steps_data = json.loads(cleaned_response)
@@ -157,7 +199,7 @@ def agent_element_identifier(steps: List[PlannedStep], model_provider: str, mode
157199
cleaned_response = response['message']['content']
158200
else: # Default to online
159201
model = genai.GenerativeModel(model_name)
160-
response = model.generate_content(prompt)
202+
response = call_gemini_with_retry(model, prompt)
161203
cleaned_response = response.text.strip().lstrip("```json").rstrip("```").strip()
162204

163205
locator_data = json.loads(cleaned_response)
@@ -271,7 +313,7 @@ def agent_code_validator(code: str, model_provider: str, model_name: str) -> Val
271313
cleaned_response = response['message']['content']
272314
else: # Default to online
273315
model = genai.GenerativeModel(model_name)
274-
response = model.generate_content(prompt)
316+
response = call_gemini_with_retry(model, prompt)
275317
cleaned_response = response.text.strip().lstrip("```json").rstrip("```").strip()
276318

277319
validation_data = json.loads(cleaned_response)

0 commit comments

Comments
 (0)