diff --git a/src/dstack/_internal/server/app.py b/src/dstack/_internal/server/app.py index 527dd128f..488a5a9e0 100644 --- a/src/dstack/_internal/server/app.py +++ b/src/dstack/_internal/server/app.py @@ -306,19 +306,31 @@ def _extract_project_name(request: Request): return project_name + def _extract_endpoint_label(request: Request, response: Response) -> str: + route = request.scope.get("route") + route_path = getattr(route, "path", None) + if route_path: + return route_path + if not request.url.path.startswith("/api/"): + return "__non_api__" + if response.status_code == status.HTTP_404_NOT_FOUND: + return "__not_found__" + return "__unmatched__" + project_name = _extract_project_name(request) response: Response = await call_next(request) + endpoint_label = _extract_endpoint_label(request, response) REQUEST_DURATION.labels( method=request.method, - endpoint=request.url.path, + endpoint=endpoint_label, http_status=response.status_code, project_name=project_name, ).observe(request.state.process_time) REQUESTS_TOTAL.labels( method=request.method, - endpoint=request.url.path, + endpoint=endpoint_label, http_status=response.status_code, project_name=project_name, ).inc() diff --git a/src/dstack/_internal/server/routers/prometheus.py b/src/dstack/_internal/server/routers/prometheus.py index a5538edfe..da0115eb7 100644 --- a/src/dstack/_internal/server/routers/prometheus.py +++ b/src/dstack/_internal/server/routers/prometheus.py @@ -25,6 +25,9 @@ async def get_prometheus_metrics( session: Annotated[AsyncSession, Depends(get_session)], ) -> str: + # Note: Prometheus warns against storing high cardinality values in labels, + # yet both client and custom metrics have labels like project, run, fleet, etc. + # This may require a very big Prometheus server with lots of storage. if not settings.ENABLE_PROMETHEUS_METRICS: raise error_not_found() custom_metrics_ = await custom_metrics.get_metrics(session=session)