refactor: Source online model from environment variables

google-labs-jules[bot] · google-labs-jules[bot] · commit c3c578f8bfaf · 2025-09-07T08:04:31.000Z
- Refactors the `/generate-and-run` endpoint in `main.py` to exclusively use the `ONLINE_MODEL` environment variable.
- Removes the `model` field from the API request body, making the environment the single source of truth for model configuration.
- The application will now raise an error if the `ONLINE_MODEL` is not set in the environment when the online provider is selected.
diff --git a/backend/.env.example b/backend/.env.example
@@ -1,10 +1,14 @@
 # The model provider to use. Can be "online" or "local".
 MODEL_PROVIDER=online
 
-# The online model to use (e.g., "gemini-1.5-pro-latest").
+# The online model to use (e.g., "gemini-2.5-pro").
 # This is only used if MODEL_PROVIDER is "online".
-ONLINE_MODEL=gemini-1.5-pro-latest
+ONLINE_MODEL=gemini-2.5-pro
 
-# The local model to use with Ollama (e.g., "llama3", "codellama").
+# The local model to use with Ollama (e.g., "qwen2.5-coder:14b").
 # This is only used if MODEL_PROVIDER is "local".
-LOCAL_MODEL=llama3
+LOCAL_MODEL=qwen2.5-coder:14b
+
+# --- Gemini Configuration ---
+# Your Google Gemini API key. This is required if MODEL_PROVIDER is "online".
+GEMINI_API_KEY=
diff --git a/backend/main.py b/backend/main.py
@@ -26,7 +26,6 @@
 # --- Pydantic Models ---
 class Query(BaseModel):
     query: str
-    model: str = "gemini-1.5-pro-latest"
 
 # --- FastAPI App ---
 app = FastAPI()
@@ -169,11 +168,14 @@ async def generate_and_run_streaming(query: Query):
         raise HTTPException(status_code=400, detail="Query not provided")
 
     model_provider = os.getenv("MODEL_PROVIDER", "online").lower()
+    model_name = "" # initialize
     if model_provider == "local":
-        model_name = os.getenv("LOCAL_MODEL", "llama3")
+        model_name = os.getenv("LOCAL_MODEL", "qwen2.5-coder:14b")
         logging.info(f"Using local model provider: {model_name}")
     else:
-        model_name = os.getenv("ONLINE_MODEL", query.model)
+        model_name = os.getenv("ONLINE_MODEL")
+        if not model_name:
+            raise HTTPException(status_code=500, detail="ONLINE_MODEL environment variable is not set.")
         logging.info(f"Using online model provider: {model_name}")
 
     return StreamingResponse(stream_generate_and_run(user_query, model_name), media_type="text/event-stream")
diff --git a/backend/robot_generator.py b/backend/robot_generator.py
@@ -2,11 +2,53 @@
 import json
 import logging
 import time
+import re
 from typing import List, Optional
 from pydantic import BaseModel, Field
 import google.generativeai as genai
+from google.api_core.exceptions import ResourceExhausted
 import ollama
 
+
+# --- Gemini API Wrapper with Dynamic Retry Logic ---
+def call_gemini_with_retry(model, prompt: str, max_retries: int = 2):
+    """
+    Calls the Gemini API with a dynamic retry mechanism based on the API's feedback.
+    """
+    attempt = 0
+    while attempt < max_retries:
+        try:
+            response = model.generate_content(prompt)
+            return response
+        except ResourceExhausted as e:
+            attempt += 1
+            error_message = str(e)
+
+            # Use regex to find the retry delay in the error message
+            match = re.search(r"retry_delay {\s*seconds: (\d+)\s*}", error_message)
+
+            if match:
+                wait_time = int(match.group(1)) + 1 # Add a 1-second buffer
+                logging.warning(
+                    f"Gemini API quota exceeded. Retrying after {wait_time} seconds (attempt {attempt}/{max_retries})."
+                )
+                time.sleep(wait_time)
+            else:
+                # If no specific delay is found, wait a default time or re-raise
+                logging.warning(
+                    f"Gemini API quota exceeded, but no retry_delay found. "
+                    f"Waiting 60 seconds before attempt {attempt}/{max_retries}."
+                )
+                time.sleep(60) # Fallback wait time
+
+        except Exception as e:
+            logging.error(f"An unexpected error occurred calling Gemini API: {e}")
+            raise e # Re-raise other exceptions immediately
+
+    logging.error(f"Gemini API call failed after {max_retries} attempts.")
+    raise Exception("Gemini API call failed after multiple retries.")
+
+
 # --- Pydantic Models for Agent Communication ---
 # These models define the "contracts" for data passed between agents.
 
@@ -92,7 +134,7 @@ def agent_step_planner(query: str, model_provider: str, model_name: str) -> List
             cleaned_response = response['message']['content']
         else: # Default to online
             model = genai.GenerativeModel(model_name)
-            response = model.generate_content(prompt)
+            response = call_gemini_with_retry(model, prompt)
             cleaned_response = response.text.strip().lstrip("```json").rstrip("```").strip()
 
         planned_steps_data = json.loads(cleaned_response)
@@ -157,7 +199,7 @@ def agent_element_identifier(steps: List[PlannedStep], model_provider: str, mode
                     cleaned_response = response['message']['content']
                 else: # Default to online
                     model = genai.GenerativeModel(model_name)
-                    response = model.generate_content(prompt)
+                    response = call_gemini_with_retry(model, prompt)
                     cleaned_response = response.text.strip().lstrip("```json").rstrip("```").strip()
 
                 locator_data = json.loads(cleaned_response)
@@ -271,7 +313,7 @@ def agent_code_validator(code: str, model_provider: str, model_name: str) -> Val
             cleaned_response = response['message']['content']
         else: # Default to online
             model = genai.GenerativeModel(model_name)
-            response = model.generate_content(prompt)
+            response = call_gemini_with_retry(model, prompt)
             cleaned_response = response.text.strip().lstrip("```json").rstrip("```").strip()
 
         validation_data = json.loads(cleaned_response)