-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
feat(uptime): Add organization uptime summary endpoint #96710
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
evanpurkhiser
merged 1 commit into
master
from
evanpurkhiser/feat-uptime-add-organization-uptime-percentiles-endpoint
Jul 30, 2025
+857
−46
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
287 changes: 287 additions & 0 deletions
287
src/sentry/uptime/endpoints/organization_uptime_summary.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,287 @@ | ||
import logging | ||
import uuid | ||
from datetime import datetime | ||
|
||
from drf_spectacular.utils import extend_schema | ||
from google.protobuf.timestamp_pb2 import Timestamp | ||
from rest_framework.request import Request | ||
from rest_framework.response import Response | ||
from sentry_kafka_schemas.schema_types.uptime_results_v1 import ( | ||
CHECKSTATUS_FAILURE, | ||
CHECKSTATUS_MISSED_WINDOW, | ||
) | ||
from sentry_protos.snuba.v1.attribute_conditional_aggregation_pb2 import ( | ||
AttributeConditionalAggregation, | ||
) | ||
from sentry_protos.snuba.v1.downsampled_storage_pb2 import DownsampledStorageConfig | ||
from sentry_protos.snuba.v1.endpoint_trace_item_table_pb2 import ( | ||
Column, | ||
TraceItemTableRequest, | ||
TraceItemTableResponse, | ||
) | ||
from sentry_protos.snuba.v1.request_common_pb2 import RequestMeta, TraceItemType | ||
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import ( | ||
AttributeAggregation, | ||
AttributeKey, | ||
AttributeValue, | ||
Function, | ||
StrArray, | ||
) | ||
from sentry_protos.snuba.v1.trace_item_filter_pb2 import ( | ||
AndFilter, | ||
ComparisonFilter, | ||
TraceItemFilter, | ||
) | ||
|
||
from sentry import features | ||
from sentry.api.api_owners import ApiOwner | ||
from sentry.api.api_publish_status import ApiPublishStatus | ||
from sentry.api.base import region_silo_endpoint | ||
from sentry.api.bases.organization import OrganizationEndpoint, OrganizationPermission | ||
from sentry.api.serializers import serialize | ||
from sentry.api.utils import get_date_range_from_params | ||
from sentry.models.organization import Organization | ||
from sentry.models.project import Project | ||
from sentry.uptime.endpoints.utils import ( | ||
MAX_UPTIME_SUBSCRIPTION_IDS, | ||
authorize_and_map_project_uptime_subscription_ids, | ||
) | ||
from sentry.uptime.types import IncidentStatus, UptimeSummary | ||
from sentry.utils.snuba_rpc import table_rpc | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
@region_silo_endpoint | ||
@extend_schema(tags=["Uptime Monitors"]) | ||
class OrganizationUptimeSummaryEndpoint(OrganizationEndpoint): | ||
publish_status = { | ||
"GET": ApiPublishStatus.EXPERIMENTAL, | ||
} | ||
owner = ApiOwner.CRONS | ||
permission_classes = (OrganizationPermission,) | ||
|
||
def get(self, request: Request, organization: Organization) -> Response: | ||
start, end = get_date_range_from_params(request.GET) | ||
projects = self.get_projects(request, organization, include_all_accessible=True) | ||
|
||
project_uptime_subscription_ids = request.GET.getlist("projectUptimeSubscriptionId") | ||
|
||
if not project_uptime_subscription_ids: | ||
return self.respond("No project uptime subscription ids provided", status=400) | ||
|
||
if len(project_uptime_subscription_ids) > MAX_UPTIME_SUBSCRIPTION_IDS: | ||
return self.respond( | ||
f"Too many project uptime subscription ids provided. Maximum is {MAX_UPTIME_SUBSCRIPTION_IDS}", | ||
status=400, | ||
) | ||
|
||
use_eap_results = features.has( | ||
"organizations:uptime-eap-uptime-results-query", organization, actor=request.user | ||
) | ||
|
||
try: | ||
# XXX: We need to query these using hex, since we store them without dashes. | ||
# We remove this once we remove the old uptime checks | ||
if use_eap_results: | ||
subscription_id_formatter = lambda sub_id: uuid.UUID(sub_id).hex | ||
else: | ||
subscription_id_formatter = lambda sub_id: str(uuid.UUID(sub_id)) | ||
|
||
subscription_id_to_project_uptime_subscription_id, subscription_ids = ( | ||
authorize_and_map_project_uptime_subscription_ids( | ||
project_uptime_subscription_ids, projects, subscription_id_formatter | ||
) | ||
) | ||
except ValueError: | ||
return self.respond("Invalid project uptime subscription ids provided", status=400) | ||
|
||
try: | ||
if use_eap_results: | ||
eap_response = self._make_eap_request( | ||
organization, | ||
projects, | ||
subscription_ids, | ||
start, | ||
end, | ||
TraceItemType.TRACE_ITEM_TYPE_UPTIME_RESULT, | ||
"subscription_id", | ||
) | ||
else: | ||
eap_response = self._make_eap_request( | ||
organization, | ||
projects, | ||
subscription_ids, | ||
start, | ||
end, | ||
TraceItemType.TRACE_ITEM_TYPE_UPTIME_CHECK, | ||
"uptime_subscription_id", | ||
) | ||
formatted_response = self._format_response(eap_response) | ||
except Exception: | ||
logger.exception("Error making EAP RPC request for uptime check summary") | ||
return self.respond("error making request", status=400) | ||
|
||
# Map the response back to project uptime subscription ids | ||
mapped_response = self._map_response_to_project_uptime_subscription_ids( | ||
subscription_id_to_project_uptime_subscription_id, formatted_response | ||
) | ||
|
||
# Serialize the UptimeSummary objects | ||
serialized_response = { | ||
project_id: serialize(stats, request.user) | ||
for project_id, stats in mapped_response.items() | ||
} | ||
|
||
return self.respond(serialized_response) | ||
|
||
def _make_eap_request( | ||
self, | ||
organization: Organization, | ||
projects: list[Project], | ||
subscription_ids: list[str], | ||
start: datetime, | ||
end: datetime, | ||
trace_item_type: TraceItemType.ValueType, | ||
subscription_key: str, | ||
) -> TraceItemTableResponse: | ||
start_timestamp = Timestamp() | ||
start_timestamp.FromDatetime(start) | ||
end_timestamp = Timestamp() | ||
end_timestamp.FromDatetime(end) | ||
|
||
subscription_attribute_key = AttributeKey( | ||
name=subscription_key, | ||
type=AttributeKey.Type.TYPE_STRING, | ||
) | ||
|
||
query_filter = TraceItemFilter( | ||
comparison_filter=ComparisonFilter( | ||
key=subscription_attribute_key, | ||
op=ComparisonFilter.OP_IN, | ||
value=AttributeValue(val_str_array=StrArray(values=subscription_ids)), | ||
) | ||
) | ||
|
||
def failure_filter(incident_status: IncidentStatus) -> TraceItemFilter: | ||
status_filter = TraceItemFilter( | ||
comparison_filter=ComparisonFilter( | ||
key=AttributeKey(name="check_status", type=AttributeKey.Type.TYPE_STRING), | ||
op=ComparisonFilter.OP_EQUALS, | ||
value=AttributeValue(val_str=CHECKSTATUS_FAILURE), | ||
) | ||
) | ||
incident_filter = TraceItemFilter( | ||
comparison_filter=ComparisonFilter( | ||
key=AttributeKey(name="incident_status", type=AttributeKey.Type.TYPE_INT), | ||
op=ComparisonFilter.OP_EQUALS, | ||
value=AttributeValue(val_int=incident_status.value), | ||
) | ||
) | ||
return TraceItemFilter(and_filter=AndFilter(filters=[status_filter, incident_filter])) | ||
|
||
columns: list[Column] = [ | ||
Column(label="uptime_subscription_id", key=subscription_attribute_key), | ||
Column( | ||
label="total_checks", | ||
aggregation=AttributeAggregation( | ||
aggregate=Function.FUNCTION_COUNT, | ||
key=subscription_attribute_key, | ||
label="count()", | ||
), | ||
), | ||
Column( | ||
label="failed_checks", | ||
conditional_aggregation=AttributeConditionalAggregation( | ||
aggregate=Function.FUNCTION_COUNT, | ||
key=subscription_attribute_key, | ||
filter=failure_filter(incident_status=IncidentStatus.NO_INCIDENT), | ||
), | ||
), | ||
Column( | ||
label="downtime_checks", | ||
conditional_aggregation=AttributeConditionalAggregation( | ||
aggregate=Function.FUNCTION_COUNT, | ||
key=subscription_attribute_key, | ||
filter=failure_filter(incident_status=IncidentStatus.IN_INCIDENT), | ||
), | ||
), | ||
Column( | ||
label="missed_window_checks", | ||
conditional_aggregation=AttributeConditionalAggregation( | ||
aggregate=Function.FUNCTION_COUNT, | ||
key=subscription_attribute_key, | ||
filter=TraceItemFilter( | ||
comparison_filter=ComparisonFilter( | ||
key=AttributeKey( | ||
name="check_status", type=AttributeKey.Type.TYPE_STRING | ||
), | ||
op=ComparisonFilter.OP_EQUALS, | ||
value=AttributeValue(val_str=CHECKSTATUS_MISSED_WINDOW), | ||
) | ||
), | ||
), | ||
), | ||
] | ||
|
||
request = TraceItemTableRequest( | ||
meta=RequestMeta( | ||
organization_id=organization.id, | ||
project_ids=[project.id for project in projects], | ||
trace_item_type=trace_item_type, | ||
start_timestamp=start_timestamp, | ||
end_timestamp=end_timestamp, | ||
downsampled_storage_config=DownsampledStorageConfig( | ||
mode=DownsampledStorageConfig.MODE_HIGHEST_ACCURACY | ||
), | ||
), | ||
group_by=[subscription_attribute_key], | ||
filter=query_filter, | ||
columns=columns, | ||
) | ||
responses = table_rpc([request]) | ||
assert len(responses) == 1 | ||
return responses[0] | ||
|
||
def _format_response(self, response: TraceItemTableResponse) -> dict[str, UptimeSummary]: | ||
""" | ||
Formats the response from the EAP RPC request into a dictionary mapping | ||
subscription ids to UptimeSummary | ||
""" | ||
column_values = response.column_values | ||
column_names = [cv.attribute_name for cv in column_values] | ||
formatted_data: dict[str, UptimeSummary] = {} | ||
|
||
if not column_values: | ||
return {} | ||
|
||
for row_idx in range(len(column_values[0].results)): | ||
row_dict: dict[str, AttributeValue] = { | ||
col_name: column_values[col_idx].results[row_idx] | ||
for col_idx, col_name in enumerate(column_names) | ||
} | ||
|
||
summary_stats = UptimeSummary( | ||
total_checks=int(row_dict["total_checks"].val_double), | ||
failed_checks=int(row_dict["failed_checks"].val_double), | ||
downtime_checks=int(row_dict["downtime_checks"].val_double), | ||
missed_window_checks=int(row_dict["missed_window_checks"].val_double), | ||
) | ||
|
||
subscription_id = row_dict["uptime_subscription_id"].val_str | ||
formatted_data[subscription_id] = summary_stats | ||
|
||
return formatted_data | ||
|
||
def _map_response_to_project_uptime_subscription_ids( | ||
self, | ||
subscription_id_to_project_uptime_subscription_id: dict[str, int], | ||
formatted_response: dict[str, UptimeSummary], | ||
) -> dict[int, UptimeSummary]: | ||
""" | ||
Map the response back to project uptime subscription ids | ||
""" | ||
return { | ||
subscription_id_to_project_uptime_subscription_id[subscription_id]: data | ||
for subscription_id, data in formatted_response.items() | ||
} |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.