Skip to content

Commit ea2ee6a

Browse files
authored
Update Tool Server (#37)
* Enhance search endpoint with error handling and usage rollback - Updated the search function to include an authenticator dependency for managing API key usage. - Implemented error handling to roll back usage reservations in case of request failures. - Added a new method in APIKeyAuthenticator to decrement usage counters, ensuring accurate tracking of API key usage. * Add API key usage retry logic and configuration - Introduced new environment variables for API key usage retries and delays. - Implemented a retry mechanism with exponential backoff for Firestore transactions in the API key repository. - Added a method to decrement the usage counter, ensuring accurate tracking during request failures. * Add daily usage tracking for Gemini models - Introduced DailyUsageRepository to manage daily usage counters. - Added environment variables for daily usage configuration in .env.example and README. - Updated app.py to integrate daily usage tracking with API key consumption. - Modified Dockerfile to include daily_usage.py. - Enhanced tests to validate usage consumption and limits. * Updated search function to eliminate the reservation variable, streamlining the code. * Remove unnecessary 'None' check
1 parent a4bb522 commit ea2ee6a

File tree

9 files changed

+604
-44
lines changed

9 files changed

+604
-44
lines changed

src/utils/web_search/.dockerignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
!app.py
44
!db.py
55
!auth.py
6+
!daily_usage.py
67
!requirements-app.txt
78
!requirements_app.in
89
!Dockerfile

src/utils/web_search/.env.example

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,23 @@ FIRESTORE_PROJECT_ID=***
33
FIRESTORE_DATABASE_NAME=***
44
FIRESTORE_COLLECTION=apiKeys
55

6-
PBKDF2_ITERATIONS=20000
7-
PBKDF2_SALT_BYTES=16
6+
GEMINI_MAX_ATTEMPTS=1
7+
GEMINI_MAX_BACKOFF_SECONDS=2
8+
9+
API_KEY_PBKDF2_ITERATIONS=20000
10+
API_KEY_PBKDF2_SALT_BYTES=16
811

912
API_KEY_CACHE_TTL=30
1013
API_KEY_CACHE_MAX_ITEMS=1024
14+
15+
API_KEY_USAGE_MAX_RETRIES=8
16+
API_KEY_USAGE_BASE_DELAY=0.05
17+
API_KEY_USAGE_MAX_DELAY=1.0
18+
19+
DAILY_USAGE_COLLECTION=dailyUsageCounters
20+
DAILY_USAGE_MAX_RETRIES=8
21+
DAILY_USAGE_BASE_DELAY=0.05
22+
DAILY_USAGE_MAX_DELAY=1.0
23+
24+
GEMINI_GROUNDING_FREE_LIMIT_PRO=1500
25+
GEMINI_GROUNDING_FREE_LIMIT_FLASH=1500

src/utils/web_search/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ RUN pip install --no-cache-dir -r requirements-app.txt
77

88
RUN mkdir -p /app/src/utils/web_search
99
RUN touch /app/src/utils/__init__.py
10-
COPY __init__.py app.py auth.py db.py /app/src/utils/web_search/
10+
COPY __init__.py app.py auth.py db.py daily_usage.py /app/src/utils/web_search/
1111

1212
ENV PYTHONPATH=/app/src
1313
CMD ["uvicorn", "utils.web_search.app:app", "--host", "0.0.0.0", "--port", "8080"]

src/utils/web_search/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ gcloud auth configure-docker "$REGION-docker.pkg.dev"
4949
| `GEMINI_API_KEY` | Gemini API key used by the proxy | _(required)_ |
5050
| `GEMINI_MAX_ATTEMPTS`, `GEMINI_MAX_BACKOFF_SECONDS` | Retry tuning | `5`, `10` |
5151
| `API_KEY_CACHE_TTL`, `API_KEY_CACHE_MAX_ITEMS` | Auth cache tuning | `30`, `1024` |
52+
| `DAILY_USAGE_COLLECTION` | Collection that stores per-day usage counters | `dailyUsageCounters` |
53+
| `DAILY_USAGE_MAX_RETRIES`, `DAILY_USAGE_BASE_DELAY`, `DAILY_USAGE_MAX_DELAY` | Daily usage retry tuning | `8`, `0.05`, `1.0` |
54+
| `GEMINI_GROUNDING_FREE_LIMIT_PRO` | Daily free allowance for `gemini-2.5-pro` | `1500` |
55+
| `GEMINI_GROUNDING_FREE_LIMIT_FLASH` | Shared daily free allowance for Flash/Flash-Lite | `1500` |
5256

5357
Keep `.env.example` up to date so teammates can copy it into their own `.env`.
5458

@@ -81,6 +85,8 @@ Keep `.env.example` up to date so teammates can copy it into their own `.env`.
8185
export FIRESTORE_DATABASE_NAME=grounding
8286
export FIRESTORE_EMULATOR_HOST=0.0.0.0:8922
8387
export GEMINI_API_KEY="dev-placeholder"
88+
export GEMINI_GROUNDING_FREE_LIMIT_PRO=1500
89+
export GEMINI_GROUNDING_FREE_LIMIT_FLASH=1500
8490
```
8591

8692
4. **Install Python dependencies**

src/utils/web_search/app.py

Lines changed: 151 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
InactiveAPIKeyError,
2222
InvalidAPIKeyError,
2323
)
24+
from .daily_usage import DailyUsageRepository
2425
from .db import APIKeyRecord, APIKeyRepository, UsageLimitExceededError
2526

2627

@@ -37,6 +38,58 @@
3738
FIRESTORE_COLLECTION = os.getenv("FIRESTORE_COLLECTION", "apiKeys")
3839
API_KEY_CACHE_TTL = int(os.getenv("API_KEY_CACHE_TTL", "30"))
3940
API_KEY_CACHE_MAX_ITEMS = int(os.getenv("API_KEY_CACHE_MAX_ITEMS", "1024"))
41+
FREE_LIMIT_DEFAULT_PRO = 1500
42+
FREE_LIMIT_DEFAULT_FLASH = 1500
43+
44+
45+
def _parse_free_limit(env_var: str, default: int) -> int:
46+
"""Parse a non-negative integer from the environment with logging."""
47+
value = os.getenv(env_var)
48+
if value is None or value == "":
49+
return default
50+
try:
51+
parsed = int(value)
52+
except ValueError:
53+
logger.warning(
54+
"Invalid value '%s' for %s; falling back to %d",
55+
value,
56+
env_var,
57+
default,
58+
)
59+
return default
60+
if parsed < 0:
61+
logger.warning(
62+
"Negative value '%s' for %s; treating as 0",
63+
value,
64+
env_var,
65+
)
66+
return 0
67+
return parsed
68+
69+
70+
MODEL_TO_USAGE_BUCKET: dict[str, str] = {
71+
"gemini-2.5-pro": "gemini-2.5-pro",
72+
"gemini-2.5-flash": "gemini-2.5-flash-family",
73+
"gemini-2.5-flash-lite": "gemini-2.5-flash-family",
74+
}
75+
76+
BUCKET_FREE_LIMITS: dict[str, int] = {
77+
"gemini-2.5-pro": _parse_free_limit(
78+
"GEMINI_GROUNDING_FREE_LIMIT_PRO",
79+
FREE_LIMIT_DEFAULT_PRO,
80+
),
81+
"gemini-2.5-flash-family": _parse_free_limit(
82+
"GEMINI_GROUNDING_FREE_LIMIT_FLASH",
83+
FREE_LIMIT_DEFAULT_FLASH,
84+
),
85+
}
86+
87+
88+
def _resolve_usage_bucket(model: str) -> tuple[str, int]:
89+
"""Return the usage bucket and free allowance for the given model."""
90+
bucket = MODEL_TO_USAGE_BUCKET.get(model, model)
91+
return bucket, BUCKET_FREE_LIMITS.get(bucket, 0)
92+
4093

4194
RETRYABLE_EXCEPTIONS: tuple[type[Exception], ...] = (
4295
google_exceptions.ResourceExhausted,
@@ -189,6 +242,13 @@ async def startup_event() -> None:
189242
cache_ttl_seconds=API_KEY_CACHE_TTL,
190243
cache_max_items=API_KEY_CACHE_MAX_ITEMS,
191244
)
245+
app.state.daily_usage_repository = DailyUsageRepository(
246+
firestore_client,
247+
collection_name=os.getenv(
248+
"DAILY_USAGE_COLLECTION",
249+
"dailyUsageCounters",
250+
),
251+
)
192252

193253

194254
async def shutdown_event() -> None:
@@ -235,6 +295,18 @@ def get_authenticator() -> APIKeyAuthenticator:
235295
return authenticator
236296

237297

298+
def get_daily_usage_repository() -> DailyUsageRepository:
299+
"""Return the daily usage repository stored on the app state."""
300+
repository: DailyUsageRepository | None = getattr(
301+
app.state,
302+
"daily_usage_repository",
303+
None,
304+
)
305+
if repository is None:
306+
raise RuntimeError("Daily usage repository has not been initialised")
307+
return repository
308+
309+
238310
async def _authenticate_request(
239311
api_key_header: str,
240312
authenticator: APIKeyAuthenticator,
@@ -269,37 +341,6 @@ async def _authenticate_request(
269341
) from exc
270342

271343

272-
async def require_api_key(
273-
api_key_header: Annotated[str, Header(alias="X-API-Key")],
274-
authenticator: Annotated[APIKeyAuthenticator, Depends(get_authenticator)],
275-
) -> APIKeyRecord:
276-
"""Validate the user's API key and reserve a usage slot.
277-
278-
Parameters
279-
----------
280-
api_key_header : str
281-
API key supplied in the ``X-API-Key`` header.
282-
authenticator : APIKeyAuthenticator
283-
Authenticator responsible for validating and reserving usage.
284-
285-
Returns
286-
-------
287-
APIKeyRecord
288-
Updated API key record that includes the latest usage counter.
289-
290-
Raises
291-
------
292-
HTTPException
293-
Raised when the API key is invalid, inactive, or has exhausted its
294-
quota.
295-
"""
296-
return await _authenticate_request(
297-
api_key_header,
298-
authenticator,
299-
consume_usage=True,
300-
)
301-
302-
303344
async def require_api_key_without_consumption(
304345
api_key_header: Annotated[str, Header(alias="X-API-Key")],
305346
authenticator: Annotated[APIKeyAuthenticator, Depends(get_authenticator)],
@@ -405,7 +446,9 @@ async def call_gemini_with_retry(request: RequestBody) -> types.GenerateContentR
405446
)
406447
except RETRYABLE_EXCEPTIONS as exc:
407448
if attempt >= MAX_GEMINI_ATTEMPTS:
408-
logger.exception("Gemini request failed after retries")
449+
logger.exception(
450+
"Gemini request failed after %d retries", MAX_GEMINI_ATTEMPTS
451+
)
409452
raise HTTPException(
410453
status_code=status.HTTP_502_BAD_GATEWAY,
411454
detail="Gemini is currently unavailable",
@@ -444,25 +487,93 @@ async def health() -> dict[str, str]:
444487
@router.post("/v1/grounding_with_search")
445488
async def search(
446489
request: RequestBody,
447-
_: Annotated[APIKeyRecord, Depends(require_api_key)],
490+
record: Annotated[
491+
APIKeyRecord,
492+
Depends(require_api_key_without_consumption),
493+
],
494+
authenticator: Annotated[APIKeyAuthenticator, Depends(get_authenticator)],
495+
daily_usage: Annotated[
496+
DailyUsageRepository,
497+
Depends(get_daily_usage_repository),
498+
],
448499
) -> dict[str, object]:
449500
"""Proxy Gemini grounding requests with quota enforcement.
450501
451502
Parameters
452503
----------
453504
request : RequestBody
454505
Payload describing the Gemini call.
455-
_ : APIKeyRecord
456-
API key record produced by ``require_api_key``. The underscore keeps
457-
the dependency explicit without exposing it to callers.
506+
record : APIKeyRecord
507+
API key record produced by ``require_api_key``.
508+
authenticator : APIKeyAuthenticator
509+
Authenticator dependency used to roll back usage reservations on error.
458510
459511
Returns
460512
-------
461-
google.genai.types.GenerateContentResponse
462-
Response returned by the Gemini model.
513+
dict of str to object
514+
JSON serialisable response returned by the Gemini model.
463515
"""
464-
response = await call_gemini_with_retry(request)
465-
logger.info("Gemini request completed for model %s", request.model)
516+
bucket, free_limit = _resolve_usage_bucket(request.model)
517+
consumed_api_quota = False
518+
reservation = await daily_usage.reserve(bucket, free_limit)
519+
520+
if not reservation.consumed_free:
521+
try:
522+
updated_record = await authenticator.consume_usage(record.lookup_hash)
523+
except UsageLimitExceededError as exc:
524+
await daily_usage.release(reservation)
525+
raise HTTPException(
526+
status_code=status.HTTP_403_FORBIDDEN,
527+
detail="API key usage limit exceeded",
528+
) from exc
529+
except InvalidAPIKeyError as exc:
530+
await daily_usage.release(reservation)
531+
raise HTTPException(
532+
status_code=status.HTTP_401_UNAUTHORIZED,
533+
detail="Invalid API key provided",
534+
) from exc
535+
except InactiveAPIKeyError as exc:
536+
await daily_usage.release(reservation)
537+
raise HTTPException(
538+
status_code=status.HTTP_403_FORBIDDEN,
539+
detail="API key is inactive",
540+
) from exc
541+
except ExpiredAPIKeyError as exc:
542+
await daily_usage.release(reservation)
543+
raise HTTPException(
544+
status_code=status.HTTP_403_FORBIDDEN,
545+
detail="API key has expired",
546+
) from exc
547+
548+
record = updated_record
549+
consumed_api_quota = True
550+
551+
try:
552+
response = await call_gemini_with_retry(request)
553+
except Exception:
554+
try:
555+
await daily_usage.release(reservation)
556+
except Exception: # pragma: no cover - defensive logging for rollbacks
557+
logger.exception(
558+
"Failed to roll back daily usage for bucket %s",
559+
bucket,
560+
)
561+
562+
if consumed_api_quota:
563+
try:
564+
await authenticator.release_usage(record.lookup_hash)
565+
except Exception: # pragma: no cover - defensive logging for rollbacks
566+
logger.exception(
567+
"Failed to roll back usage for API key %s", record.lookup_hash
568+
)
569+
raise
570+
571+
logger.info(
572+
"Gemini request completed for model %s (bucket=%s, consumed_free=%s)",
573+
request.model,
574+
bucket,
575+
reservation.consumed_free if reservation else False,
576+
)
466577
return response.to_json_dict()
467578

468579

src/utils/web_search/auth.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,58 @@ async def reserve_usage(
286286

287287
return record
288288

289+
async def consume_usage(self, lookup_hash: str) -> APIKeyRecord:
290+
"""Increment usage counter for a previously validated API key."""
291+
record = self._cache_lookup(lookup_hash)
292+
293+
if not record:
294+
try:
295+
record = await self._repository.get_api_key(lookup_hash)
296+
except APIKeyNotFoundError as exc:
297+
raise InvalidAPIKeyError("API key not recognised") from exc
298+
self._cache_store(record)
299+
300+
if record.status != "active":
301+
raise InactiveAPIKeyError("API key has been suspended")
302+
303+
if record.expires_at and self._clock() >= record.expires_at:
304+
self._cache.pop(lookup_hash, None)
305+
raise ExpiredAPIKeyError("API key has expired")
306+
307+
try:
308+
updated_record = await self._repository.update_usage_counter(lookup_hash)
309+
except APIKeyNotFoundError as exc:
310+
self._cache.pop(lookup_hash, None)
311+
raise InvalidAPIKeyError("API key not recognised") from exc
312+
313+
self._cache_store(updated_record)
314+
return updated_record
315+
316+
async def release_usage(self, lookup_hash: str) -> APIKeyRecord:
317+
"""Rollback a previously reserved usage slot.
318+
319+
Parameters
320+
----------
321+
lookup_hash : str
322+
Lookup hash corresponding to the API key whose usage should be
323+
decremented.
324+
325+
Returns
326+
-------
327+
APIKeyRecord
328+
Updated record containing the decremented usage counter.
329+
"""
330+
try:
331+
updated_record = await self._repository.decrement_usage_counter(
332+
lookup_hash,
333+
)
334+
except APIKeyNotFoundError as exc: # pragma: no cover - defensive branch
335+
self._cache.pop(lookup_hash, None)
336+
raise InvalidAPIKeyError("API key not recognised") from exc
337+
338+
self._cache_store(updated_record)
339+
return updated_record
340+
289341
async def create_api_key(
290342
self,
291343
*,

0 commit comments

Comments
 (0)