Skip to content

Commit 00b6300

Browse files
evanpurkhiserandrewshie-sentry
authored andcommitted
feat(uptime): Add organization uptime summary endpoint (#96710)
- Add `OrganizationUptimeSummaryEndpoint` for returning some statistics about the uptime monitors. Right now it returns counts we can use to calculate uptime percentages. - Extract common utility function for subscription ID authorization and mapping - Support both EAP Item results and the legacy EAP checks table
1 parent 4f2cb5e commit 00b6300

File tree

8 files changed

+857
-46
lines changed

8 files changed

+857
-46
lines changed

src/sentry/api/urls.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,7 @@
419419
OrganizationUptimeAlertIndexCountEndpoint,
420420
)
421421
from sentry.uptime.endpoints.organization_uptime_stats import OrganizationUptimeStatsEndpoint
422+
from sentry.uptime.endpoints.organization_uptime_summary import OrganizationUptimeSummaryEndpoint
422423
from sentry.uptime.endpoints.project_uptime_alert_checks_index import (
423424
ProjectUptimeAlertCheckIndexEndpoint,
424425
)
@@ -2434,6 +2435,11 @@ def create_group_urls(name_prefix: str) -> list[URLPattern | URLResolver]:
24342435
OrganizationUptimeStatsEndpoint.as_view(),
24352436
name="sentry-api-0-organization-uptime-stats",
24362437
),
2438+
re_path(
2439+
r"^(?P<organization_id_or_slug>[^/]+)/uptime-summary/$",
2440+
OrganizationUptimeSummaryEndpoint.as_view(),
2441+
name="sentry-api-0-organization-uptime-summary",
2442+
),
24372443
re_path(
24382444
r"^(?P<organization_id_or_slug>[^/]+)/insights/tree/$",
24392445
OrganizationInsightsTreeEndpoint.as_view(),

src/sentry/uptime/endpoints/organization_uptime_stats.py

Lines changed: 5 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import logging
33
import uuid
44
from collections import defaultdict
5-
from collections.abc import Callable
65

76
from drf_spectacular.utils import extend_schema
87
from google.protobuf.timestamp_pb2 import Timestamp
@@ -31,16 +30,16 @@
3130
from sentry.api.bases.organization import OrganizationEndpoint, OrganizationPermission
3231
from sentry.models.organization import Organization
3332
from sentry.models.project import Project
34-
from sentry.uptime.models import ProjectUptimeSubscription
33+
from sentry.uptime.endpoints.utils import (
34+
MAX_UPTIME_SUBSCRIPTION_IDS,
35+
authorize_and_map_project_uptime_subscription_ids,
36+
)
3537
from sentry.uptime.types import IncidentStatus
3638
from sentry.utils.snuba_rpc import timeseries_rpc
3739

3840
logger = logging.getLogger(__name__)
3941

4042

41-
MAX_UPTIME_SUBSCRIPTION_IDS = 100
42-
43-
4443
@region_silo_endpoint
4544
@extend_schema(tags=["Uptime Monitors"])
4645
class OrganizationUptimeStatsEndpoint(OrganizationEndpoint, StatsMixin):
@@ -78,7 +77,7 @@ def get(self, request: Request, organization: Organization) -> Response:
7877
subscription_id_formatter = lambda sub_id: str(uuid.UUID(sub_id))
7978

8079
subscription_id_to_project_uptime_subscription_id, subscription_ids = (
81-
self._authorize_and_map_project_uptime_subscription_ids(
80+
authorize_and_map_project_uptime_subscription_ids(
8281
project_uptime_subscription_ids, projects, subscription_id_formatter
8382
)
8483
)
@@ -135,45 +134,6 @@ def get(self, request: Request, organization: Organization) -> Response:
135134

136135
return self.respond(response_with_extra_buckets)
137136

138-
def _authorize_and_map_project_uptime_subscription_ids(
139-
self,
140-
project_uptime_subscription_ids: list[str],
141-
projects: list[Project],
142-
sub_id_formatter: Callable[[str], str],
143-
) -> tuple[dict[str, int], list[str]]:
144-
"""
145-
Authorize the project uptime subscription ids and return their corresponding subscription ids
146-
we don't store the project uptime subscription id in snuba, so we need to map it to the subscription id
147-
"""
148-
project_uptime_subscription_ids_ints = [int(_id) for _id in project_uptime_subscription_ids]
149-
project_uptime_subscriptions = ProjectUptimeSubscription.objects.filter(
150-
project_id__in=[project.id for project in projects],
151-
id__in=project_uptime_subscription_ids_ints,
152-
).values_list("id", "uptime_subscription__subscription_id")
153-
154-
validated_project_uptime_subscription_ids = {
155-
project_uptime_subscription[0]
156-
for project_uptime_subscription in project_uptime_subscriptions
157-
if project_uptime_subscription[0] is not None
158-
}
159-
if set(project_uptime_subscription_ids_ints) != validated_project_uptime_subscription_ids:
160-
raise ValueError("Invalid project uptime subscription ids provided")
161-
162-
subscription_id_to_project_uptime_subscription_id = {
163-
sub_id_formatter(project_uptime_subscription[1]): project_uptime_subscription[0]
164-
for project_uptime_subscription in project_uptime_subscriptions
165-
if project_uptime_subscription[0] is not None
166-
and project_uptime_subscription[1] is not None
167-
}
168-
169-
validated_subscription_ids = [
170-
sub_id_formatter(project_uptime_subscription[1])
171-
for project_uptime_subscription in project_uptime_subscriptions
172-
if project_uptime_subscription[1] is not None
173-
]
174-
175-
return subscription_id_to_project_uptime_subscription_id, validated_subscription_ids
176-
177137
def _make_eap_request(
178138
self,
179139
organization: Organization,
Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
import logging
2+
import uuid
3+
from datetime import datetime
4+
5+
from drf_spectacular.utils import extend_schema
6+
from google.protobuf.timestamp_pb2 import Timestamp
7+
from rest_framework.request import Request
8+
from rest_framework.response import Response
9+
from sentry_kafka_schemas.schema_types.uptime_results_v1 import (
10+
CHECKSTATUS_FAILURE,
11+
CHECKSTATUS_MISSED_WINDOW,
12+
)
13+
from sentry_protos.snuba.v1.attribute_conditional_aggregation_pb2 import (
14+
AttributeConditionalAggregation,
15+
)
16+
from sentry_protos.snuba.v1.downsampled_storage_pb2 import DownsampledStorageConfig
17+
from sentry_protos.snuba.v1.endpoint_trace_item_table_pb2 import (
18+
Column,
19+
TraceItemTableRequest,
20+
TraceItemTableResponse,
21+
)
22+
from sentry_protos.snuba.v1.request_common_pb2 import RequestMeta, TraceItemType
23+
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import (
24+
AttributeAggregation,
25+
AttributeKey,
26+
AttributeValue,
27+
Function,
28+
StrArray,
29+
)
30+
from sentry_protos.snuba.v1.trace_item_filter_pb2 import (
31+
AndFilter,
32+
ComparisonFilter,
33+
TraceItemFilter,
34+
)
35+
36+
from sentry import features
37+
from sentry.api.api_owners import ApiOwner
38+
from sentry.api.api_publish_status import ApiPublishStatus
39+
from sentry.api.base import region_silo_endpoint
40+
from sentry.api.bases.organization import OrganizationEndpoint, OrganizationPermission
41+
from sentry.api.serializers import serialize
42+
from sentry.api.utils import get_date_range_from_params
43+
from sentry.models.organization import Organization
44+
from sentry.models.project import Project
45+
from sentry.uptime.endpoints.utils import (
46+
MAX_UPTIME_SUBSCRIPTION_IDS,
47+
authorize_and_map_project_uptime_subscription_ids,
48+
)
49+
from sentry.uptime.types import IncidentStatus, UptimeSummary
50+
from sentry.utils.snuba_rpc import table_rpc
51+
52+
logger = logging.getLogger(__name__)
53+
54+
55+
@region_silo_endpoint
56+
@extend_schema(tags=["Uptime Monitors"])
57+
class OrganizationUptimeSummaryEndpoint(OrganizationEndpoint):
58+
publish_status = {
59+
"GET": ApiPublishStatus.EXPERIMENTAL,
60+
}
61+
owner = ApiOwner.CRONS
62+
permission_classes = (OrganizationPermission,)
63+
64+
def get(self, request: Request, organization: Organization) -> Response:
65+
start, end = get_date_range_from_params(request.GET)
66+
projects = self.get_projects(request, organization, include_all_accessible=True)
67+
68+
project_uptime_subscription_ids = request.GET.getlist("projectUptimeSubscriptionId")
69+
70+
if not project_uptime_subscription_ids:
71+
return self.respond("No project uptime subscription ids provided", status=400)
72+
73+
if len(project_uptime_subscription_ids) > MAX_UPTIME_SUBSCRIPTION_IDS:
74+
return self.respond(
75+
f"Too many project uptime subscription ids provided. Maximum is {MAX_UPTIME_SUBSCRIPTION_IDS}",
76+
status=400,
77+
)
78+
79+
use_eap_results = features.has(
80+
"organizations:uptime-eap-uptime-results-query", organization, actor=request.user
81+
)
82+
83+
try:
84+
# XXX: We need to query these using hex, since we store them without dashes.
85+
# We remove this once we remove the old uptime checks
86+
if use_eap_results:
87+
subscription_id_formatter = lambda sub_id: uuid.UUID(sub_id).hex
88+
else:
89+
subscription_id_formatter = lambda sub_id: str(uuid.UUID(sub_id))
90+
91+
subscription_id_to_project_uptime_subscription_id, subscription_ids = (
92+
authorize_and_map_project_uptime_subscription_ids(
93+
project_uptime_subscription_ids, projects, subscription_id_formatter
94+
)
95+
)
96+
except ValueError:
97+
return self.respond("Invalid project uptime subscription ids provided", status=400)
98+
99+
try:
100+
if use_eap_results:
101+
eap_response = self._make_eap_request(
102+
organization,
103+
projects,
104+
subscription_ids,
105+
start,
106+
end,
107+
TraceItemType.TRACE_ITEM_TYPE_UPTIME_RESULT,
108+
"subscription_id",
109+
)
110+
else:
111+
eap_response = self._make_eap_request(
112+
organization,
113+
projects,
114+
subscription_ids,
115+
start,
116+
end,
117+
TraceItemType.TRACE_ITEM_TYPE_UPTIME_CHECK,
118+
"uptime_subscription_id",
119+
)
120+
formatted_response = self._format_response(eap_response)
121+
except Exception:
122+
logger.exception("Error making EAP RPC request for uptime check summary")
123+
return self.respond("error making request", status=400)
124+
125+
# Map the response back to project uptime subscription ids
126+
mapped_response = self._map_response_to_project_uptime_subscription_ids(
127+
subscription_id_to_project_uptime_subscription_id, formatted_response
128+
)
129+
130+
# Serialize the UptimeSummary objects
131+
serialized_response = {
132+
project_id: serialize(stats, request.user)
133+
for project_id, stats in mapped_response.items()
134+
}
135+
136+
return self.respond(serialized_response)
137+
138+
def _make_eap_request(
139+
self,
140+
organization: Organization,
141+
projects: list[Project],
142+
subscription_ids: list[str],
143+
start: datetime,
144+
end: datetime,
145+
trace_item_type: TraceItemType.ValueType,
146+
subscription_key: str,
147+
) -> TraceItemTableResponse:
148+
start_timestamp = Timestamp()
149+
start_timestamp.FromDatetime(start)
150+
end_timestamp = Timestamp()
151+
end_timestamp.FromDatetime(end)
152+
153+
subscription_attribute_key = AttributeKey(
154+
name=subscription_key,
155+
type=AttributeKey.Type.TYPE_STRING,
156+
)
157+
158+
query_filter = TraceItemFilter(
159+
comparison_filter=ComparisonFilter(
160+
key=subscription_attribute_key,
161+
op=ComparisonFilter.OP_IN,
162+
value=AttributeValue(val_str_array=StrArray(values=subscription_ids)),
163+
)
164+
)
165+
166+
def failure_filter(incident_status: IncidentStatus) -> TraceItemFilter:
167+
status_filter = TraceItemFilter(
168+
comparison_filter=ComparisonFilter(
169+
key=AttributeKey(name="check_status", type=AttributeKey.Type.TYPE_STRING),
170+
op=ComparisonFilter.OP_EQUALS,
171+
value=AttributeValue(val_str=CHECKSTATUS_FAILURE),
172+
)
173+
)
174+
incident_filter = TraceItemFilter(
175+
comparison_filter=ComparisonFilter(
176+
key=AttributeKey(name="incident_status", type=AttributeKey.Type.TYPE_INT),
177+
op=ComparisonFilter.OP_EQUALS,
178+
value=AttributeValue(val_int=incident_status.value),
179+
)
180+
)
181+
return TraceItemFilter(and_filter=AndFilter(filters=[status_filter, incident_filter]))
182+
183+
columns: list[Column] = [
184+
Column(label="uptime_subscription_id", key=subscription_attribute_key),
185+
Column(
186+
label="total_checks",
187+
aggregation=AttributeAggregation(
188+
aggregate=Function.FUNCTION_COUNT,
189+
key=subscription_attribute_key,
190+
label="count()",
191+
),
192+
),
193+
Column(
194+
label="failed_checks",
195+
conditional_aggregation=AttributeConditionalAggregation(
196+
aggregate=Function.FUNCTION_COUNT,
197+
key=subscription_attribute_key,
198+
filter=failure_filter(incident_status=IncidentStatus.NO_INCIDENT),
199+
),
200+
),
201+
Column(
202+
label="downtime_checks",
203+
conditional_aggregation=AttributeConditionalAggregation(
204+
aggregate=Function.FUNCTION_COUNT,
205+
key=subscription_attribute_key,
206+
filter=failure_filter(incident_status=IncidentStatus.IN_INCIDENT),
207+
),
208+
),
209+
Column(
210+
label="missed_window_checks",
211+
conditional_aggregation=AttributeConditionalAggregation(
212+
aggregate=Function.FUNCTION_COUNT,
213+
key=subscription_attribute_key,
214+
filter=TraceItemFilter(
215+
comparison_filter=ComparisonFilter(
216+
key=AttributeKey(
217+
name="check_status", type=AttributeKey.Type.TYPE_STRING
218+
),
219+
op=ComparisonFilter.OP_EQUALS,
220+
value=AttributeValue(val_str=CHECKSTATUS_MISSED_WINDOW),
221+
)
222+
),
223+
),
224+
),
225+
]
226+
227+
request = TraceItemTableRequest(
228+
meta=RequestMeta(
229+
organization_id=organization.id,
230+
project_ids=[project.id for project in projects],
231+
trace_item_type=trace_item_type,
232+
start_timestamp=start_timestamp,
233+
end_timestamp=end_timestamp,
234+
downsampled_storage_config=DownsampledStorageConfig(
235+
mode=DownsampledStorageConfig.MODE_HIGHEST_ACCURACY
236+
),
237+
),
238+
group_by=[subscription_attribute_key],
239+
filter=query_filter,
240+
columns=columns,
241+
)
242+
responses = table_rpc([request])
243+
assert len(responses) == 1
244+
return responses[0]
245+
246+
def _format_response(self, response: TraceItemTableResponse) -> dict[str, UptimeSummary]:
247+
"""
248+
Formats the response from the EAP RPC request into a dictionary mapping
249+
subscription ids to UptimeSummary
250+
"""
251+
column_values = response.column_values
252+
column_names = [cv.attribute_name for cv in column_values]
253+
formatted_data: dict[str, UptimeSummary] = {}
254+
255+
if not column_values:
256+
return {}
257+
258+
for row_idx in range(len(column_values[0].results)):
259+
row_dict: dict[str, AttributeValue] = {
260+
col_name: column_values[col_idx].results[row_idx]
261+
for col_idx, col_name in enumerate(column_names)
262+
}
263+
264+
summary_stats = UptimeSummary(
265+
total_checks=int(row_dict["total_checks"].val_double),
266+
failed_checks=int(row_dict["failed_checks"].val_double),
267+
downtime_checks=int(row_dict["downtime_checks"].val_double),
268+
missed_window_checks=int(row_dict["missed_window_checks"].val_double),
269+
)
270+
271+
subscription_id = row_dict["uptime_subscription_id"].val_str
272+
formatted_data[subscription_id] = summary_stats
273+
274+
return formatted_data
275+
276+
def _map_response_to_project_uptime_subscription_ids(
277+
self,
278+
subscription_id_to_project_uptime_subscription_id: dict[str, int],
279+
formatted_response: dict[str, UptimeSummary],
280+
) -> dict[int, UptimeSummary]:
281+
"""
282+
Map the response back to project uptime subscription ids
283+
"""
284+
return {
285+
subscription_id_to_project_uptime_subscription_id[subscription_id]: data
286+
for subscription_id, data in formatted_response.items()
287+
}

0 commit comments

Comments
 (0)