From a2d5993d2f36ff471727d460f8382f5d78b0332b Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 12 Jan 2026 14:45:46 +0500 Subject: [PATCH 1/2] Use the same metrics endpoint label for 404 requests --- src/dstack/_internal/server/app.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/dstack/_internal/server/app.py b/src/dstack/_internal/server/app.py index 527dd128f..488a5a9e0 100644 --- a/src/dstack/_internal/server/app.py +++ b/src/dstack/_internal/server/app.py @@ -306,19 +306,31 @@ def _extract_project_name(request: Request): return project_name + def _extract_endpoint_label(request: Request, response: Response) -> str: + route = request.scope.get("route") + route_path = getattr(route, "path", None) + if route_path: + return route_path + if not request.url.path.startswith("/api/"): + return "__non_api__" + if response.status_code == status.HTTP_404_NOT_FOUND: + return "__not_found__" + return "__unmatched__" + project_name = _extract_project_name(request) response: Response = await call_next(request) + endpoint_label = _extract_endpoint_label(request, response) REQUEST_DURATION.labels( method=request.method, - endpoint=request.url.path, + endpoint=endpoint_label, http_status=response.status_code, project_name=project_name, ).observe(request.state.process_time) REQUESTS_TOTAL.labels( method=request.method, - endpoint=request.url.path, + endpoint=endpoint_label, http_status=response.status_code, project_name=project_name, ).inc() From 14d1eb8618ee2ddab0274b82c56db5ed917785a5 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 12 Jan 2026 15:26:41 +0500 Subject: [PATCH 2/2] Leave comment on high cardinality labels --- src/dstack/_internal/server/routers/prometheus.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/dstack/_internal/server/routers/prometheus.py b/src/dstack/_internal/server/routers/prometheus.py index a5538edfe..da0115eb7 100644 --- a/src/dstack/_internal/server/routers/prometheus.py +++ b/src/dstack/_internal/server/routers/prometheus.py @@ -25,6 +25,9 @@ async def get_prometheus_metrics( session: Annotated[AsyncSession, Depends(get_session)], ) -> str: + # Note: Prometheus warns against storing high cardinality values in labels, + # yet both client and custom metrics have labels like project, run, fleet, etc. + # This may require a very big Prometheus server with lots of storage. if not settings.ENABLE_PROMETHEUS_METRICS: raise error_not_found() custom_metrics_ = await custom_metrics.get_metrics(session=session)