Skip to content

feat(uptime): Add organization uptime summary endpoint #96710

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/sentry/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,7 @@
OrganizationUptimeAlertIndexCountEndpoint,
)
from sentry.uptime.endpoints.organization_uptime_stats import OrganizationUptimeStatsEndpoint
from sentry.uptime.endpoints.organization_uptime_summary import OrganizationUptimeSummaryEndpoint
from sentry.uptime.endpoints.project_uptime_alert_checks_index import (
ProjectUptimeAlertCheckIndexEndpoint,
)
Expand Down Expand Up @@ -2426,6 +2427,11 @@ def create_group_urls(name_prefix: str) -> list[URLPattern | URLResolver]:
OrganizationUptimeStatsEndpoint.as_view(),
name="sentry-api-0-organization-uptime-stats",
),
re_path(
r"^(?P<organization_id_or_slug>[^/]+)/uptime-summary/$",
OrganizationUptimeSummaryEndpoint.as_view(),
name="sentry-api-0-organization-uptime-summary",
),
re_path(
r"^(?P<organization_id_or_slug>[^/]+)/insights/tree/$",
OrganizationInsightsTreeEndpoint.as_view(),
Expand Down
50 changes: 5 additions & 45 deletions src/sentry/uptime/endpoints/organization_uptime_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import logging
import uuid
from collections import defaultdict
from collections.abc import Callable

from drf_spectacular.utils import extend_schema
from google.protobuf.timestamp_pb2 import Timestamp
Expand Down Expand Up @@ -31,16 +30,16 @@
from sentry.api.bases.organization import OrganizationEndpoint, OrganizationPermission
from sentry.models.organization import Organization
from sentry.models.project import Project
from sentry.uptime.models import ProjectUptimeSubscription
from sentry.uptime.endpoints.utils import (
MAX_UPTIME_SUBSCRIPTION_IDS,
authorize_and_map_project_uptime_subscription_ids,
)
from sentry.uptime.types import IncidentStatus
from sentry.utils.snuba_rpc import timeseries_rpc

logger = logging.getLogger(__name__)


MAX_UPTIME_SUBSCRIPTION_IDS = 100


@region_silo_endpoint
@extend_schema(tags=["Uptime Monitors"])
class OrganizationUptimeStatsEndpoint(OrganizationEndpoint, StatsMixin):
Expand Down Expand Up @@ -78,7 +77,7 @@ def get(self, request: Request, organization: Organization) -> Response:
subscription_id_formatter = lambda sub_id: str(uuid.UUID(sub_id))

subscription_id_to_project_uptime_subscription_id, subscription_ids = (
self._authorize_and_map_project_uptime_subscription_ids(
authorize_and_map_project_uptime_subscription_ids(
project_uptime_subscription_ids, projects, subscription_id_formatter
)
)
Expand Down Expand Up @@ -135,45 +134,6 @@ def get(self, request: Request, organization: Organization) -> Response:

return self.respond(response_with_extra_buckets)

def _authorize_and_map_project_uptime_subscription_ids(
self,
project_uptime_subscription_ids: list[str],
projects: list[Project],
sub_id_formatter: Callable[[str], str],
) -> tuple[dict[str, int], list[str]]:
"""
Authorize the project uptime subscription ids and return their corresponding subscription ids
we don't store the project uptime subscription id in snuba, so we need to map it to the subscription id
"""
project_uptime_subscription_ids_ints = [int(_id) for _id in project_uptime_subscription_ids]
project_uptime_subscriptions = ProjectUptimeSubscription.objects.filter(
project_id__in=[project.id for project in projects],
id__in=project_uptime_subscription_ids_ints,
).values_list("id", "uptime_subscription__subscription_id")

validated_project_uptime_subscription_ids = {
project_uptime_subscription[0]
for project_uptime_subscription in project_uptime_subscriptions
if project_uptime_subscription[0] is not None
}
if set(project_uptime_subscription_ids_ints) != validated_project_uptime_subscription_ids:
raise ValueError("Invalid project uptime subscription ids provided")

subscription_id_to_project_uptime_subscription_id = {
sub_id_formatter(project_uptime_subscription[1]): project_uptime_subscription[0]
for project_uptime_subscription in project_uptime_subscriptions
if project_uptime_subscription[0] is not None
and project_uptime_subscription[1] is not None
}

validated_subscription_ids = [
sub_id_formatter(project_uptime_subscription[1])
for project_uptime_subscription in project_uptime_subscriptions
if project_uptime_subscription[1] is not None
]

return subscription_id_to_project_uptime_subscription_id, validated_subscription_ids

def _make_eap_request(
self,
organization: Organization,
Expand Down
287 changes: 287 additions & 0 deletions src/sentry/uptime/endpoints/organization_uptime_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,287 @@
import logging
import uuid
from datetime import datetime

from drf_spectacular.utils import extend_schema
from google.protobuf.timestamp_pb2 import Timestamp
from rest_framework.request import Request
from rest_framework.response import Response
from sentry_kafka_schemas.schema_types.uptime_results_v1 import (
CHECKSTATUS_FAILURE,
CHECKSTATUS_MISSED_WINDOW,
)
from sentry_protos.snuba.v1.attribute_conditional_aggregation_pb2 import (
AttributeConditionalAggregation,
)
from sentry_protos.snuba.v1.downsampled_storage_pb2 import DownsampledStorageConfig
from sentry_protos.snuba.v1.endpoint_trace_item_table_pb2 import (
Column,
TraceItemTableRequest,
TraceItemTableResponse,
)
from sentry_protos.snuba.v1.request_common_pb2 import RequestMeta, TraceItemType
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import (
AttributeAggregation,
AttributeKey,
AttributeValue,
Function,
StrArray,
)
from sentry_protos.snuba.v1.trace_item_filter_pb2 import (
AndFilter,
ComparisonFilter,
TraceItemFilter,
)

from sentry import features
from sentry.api.api_owners import ApiOwner
from sentry.api.api_publish_status import ApiPublishStatus
from sentry.api.base import region_silo_endpoint
from sentry.api.bases.organization import OrganizationEndpoint, OrganizationPermission
from sentry.api.serializers import serialize
from sentry.api.utils import get_date_range_from_params
from sentry.models.organization import Organization
from sentry.models.project import Project
from sentry.uptime.endpoints.utils import (
MAX_UPTIME_SUBSCRIPTION_IDS,
authorize_and_map_project_uptime_subscription_ids,
)
from sentry.uptime.types import IncidentStatus, UptimeSummary
from sentry.utils.snuba_rpc import table_rpc

logger = logging.getLogger(__name__)


@region_silo_endpoint
@extend_schema(tags=["Uptime Monitors"])
class OrganizationUptimeSummaryEndpoint(OrganizationEndpoint):
publish_status = {
"GET": ApiPublishStatus.EXPERIMENTAL,
}
owner = ApiOwner.CRONS
permission_classes = (OrganizationPermission,)

def get(self, request: Request, organization: Organization) -> Response:
start, end = get_date_range_from_params(request.GET)
projects = self.get_projects(request, organization, include_all_accessible=True)

project_uptime_subscription_ids = request.GET.getlist("projectUptimeSubscriptionId")

if not project_uptime_subscription_ids:
return self.respond("No project uptime subscription ids provided", status=400)

if len(project_uptime_subscription_ids) > MAX_UPTIME_SUBSCRIPTION_IDS:
return self.respond(
f"Too many project uptime subscription ids provided. Maximum is {MAX_UPTIME_SUBSCRIPTION_IDS}",
status=400,
)

use_eap_results = features.has(
"organizations:uptime-eap-uptime-results-query", organization, actor=request.user
)

try:
# XXX: We need to query these using hex, since we store them without dashes.
# We remove this once we remove the old uptime checks
if use_eap_results:
subscription_id_formatter = lambda sub_id: uuid.UUID(sub_id).hex
else:
subscription_id_formatter = lambda sub_id: str(uuid.UUID(sub_id))

subscription_id_to_project_uptime_subscription_id, subscription_ids = (
authorize_and_map_project_uptime_subscription_ids(
project_uptime_subscription_ids, projects, subscription_id_formatter
)
)
except ValueError:
return self.respond("Invalid project uptime subscription ids provided", status=400)

try:
if use_eap_results:
eap_response = self._make_eap_request(
organization,
projects,
subscription_ids,
start,
end,
TraceItemType.TRACE_ITEM_TYPE_UPTIME_RESULT,
"subscription_id",
)
else:
eap_response = self._make_eap_request(
organization,
projects,
subscription_ids,
start,
end,
TraceItemType.TRACE_ITEM_TYPE_UPTIME_CHECK,
"uptime_subscription_id",
)
formatted_response = self._format_response(eap_response)
except Exception:
logger.exception("Error making EAP RPC request for uptime check summary")
return self.respond("error making request", status=400)

# Map the response back to project uptime subscription ids
mapped_response = self._map_response_to_project_uptime_subscription_ids(
subscription_id_to_project_uptime_subscription_id, formatted_response
)

# Serialize the UptimeSummary objects
serialized_response = {
project_id: serialize(stats, request.user)
for project_id, stats in mapped_response.items()
}

return self.respond(serialized_response)

def _make_eap_request(
self,
organization: Organization,
projects: list[Project],
subscription_ids: list[str],
start: datetime,
end: datetime,
trace_item_type: TraceItemType.ValueType,
subscription_key: str,
) -> TraceItemTableResponse:
start_timestamp = Timestamp()
start_timestamp.FromDatetime(start)
end_timestamp = Timestamp()
end_timestamp.FromDatetime(end)

subscription_attribute_key = AttributeKey(
name=subscription_key,
type=AttributeKey.Type.TYPE_STRING,
)

query_filter = TraceItemFilter(
comparison_filter=ComparisonFilter(
key=subscription_attribute_key,
op=ComparisonFilter.OP_IN,
value=AttributeValue(val_str_array=StrArray(values=subscription_ids)),
)
)

def failure_filter(incident_status: IncidentStatus) -> TraceItemFilter:
status_filter = TraceItemFilter(
comparison_filter=ComparisonFilter(
key=AttributeKey(name="check_status", type=AttributeKey.Type.TYPE_STRING),
op=ComparisonFilter.OP_EQUALS,
value=AttributeValue(val_str=CHECKSTATUS_FAILURE),
)
)
incident_filter = TraceItemFilter(
comparison_filter=ComparisonFilter(
key=AttributeKey(name="incident_status", type=AttributeKey.Type.TYPE_INT),
op=ComparisonFilter.OP_EQUALS,
value=AttributeValue(val_int=incident_status.value),
)
)
return TraceItemFilter(and_filter=AndFilter(filters=[status_filter, incident_filter]))

columns: list[Column] = [
Column(label="uptime_subscription_id", key=subscription_attribute_key),
Column(
label="total_checks",
aggregation=AttributeAggregation(
aggregate=Function.FUNCTION_COUNT,
key=subscription_attribute_key,
label="count()",
),
),
Column(
label="failed_checks",
conditional_aggregation=AttributeConditionalAggregation(
aggregate=Function.FUNCTION_COUNT,
key=subscription_attribute_key,
filter=failure_filter(incident_status=IncidentStatus.NO_INCIDENT),
),
),
Column(
label="downtime_checks",
conditional_aggregation=AttributeConditionalAggregation(
aggregate=Function.FUNCTION_COUNT,
key=subscription_attribute_key,
filter=failure_filter(incident_status=IncidentStatus.IN_INCIDENT),
),
),
Column(
label="missed_window_checks",
conditional_aggregation=AttributeConditionalAggregation(
aggregate=Function.FUNCTION_COUNT,
key=subscription_attribute_key,
filter=TraceItemFilter(
comparison_filter=ComparisonFilter(
key=AttributeKey(
name="check_status", type=AttributeKey.Type.TYPE_STRING
),
op=ComparisonFilter.OP_EQUALS,
value=AttributeValue(val_str=CHECKSTATUS_MISSED_WINDOW),
)
),
),
),
]

request = TraceItemTableRequest(
meta=RequestMeta(
organization_id=organization.id,
project_ids=[project.id for project in projects],
trace_item_type=trace_item_type,
start_timestamp=start_timestamp,
end_timestamp=end_timestamp,
downsampled_storage_config=DownsampledStorageConfig(
mode=DownsampledStorageConfig.MODE_HIGHEST_ACCURACY
),
),
group_by=[subscription_attribute_key],
filter=query_filter,
columns=columns,
)
responses = table_rpc([request])
assert len(responses) == 1
return responses[0]

def _format_response(self, response: TraceItemTableResponse) -> dict[str, UptimeSummary]:
"""
Formats the response from the EAP RPC request into a dictionary mapping
subscription ids to UptimeSummary
"""
column_values = response.column_values
column_names = [cv.attribute_name for cv in column_values]
formatted_data: dict[str, UptimeSummary] = {}

if not column_values:
return {}

for row_idx in range(len(column_values[0].results)):
row_dict: dict[str, AttributeValue] = {
col_name: column_values[col_idx].results[row_idx]
for col_idx, col_name in enumerate(column_names)
}

summary_stats = UptimeSummary(
total_checks=int(row_dict["total_checks"].val_double),
failed_checks=int(row_dict["failed_checks"].val_double),
downtime_checks=int(row_dict["downtime_checks"].val_double),
missed_window_checks=int(row_dict["missed_window_checks"].val_double),
)

subscription_id = row_dict["uptime_subscription_id"].val_str
formatted_data[subscription_id] = summary_stats

return formatted_data

def _map_response_to_project_uptime_subscription_ids(
self,
subscription_id_to_project_uptime_subscription_id: dict[str, int],
formatted_response: dict[str, UptimeSummary],
) -> dict[int, UptimeSummary]:
"""
Map the response back to project uptime subscription ids
"""
return {
subscription_id_to_project_uptime_subscription_id[subscription_id]: data
for subscription_id, data in formatted_response.items()
}
Loading
Loading