-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
feat(release health): Update snuba metrics to know about session unhandled state #96707
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 9 commits
c492a3e
03b1528
4b5db16
6b5630f
a1ea717
91a471e
51b7c88
2671115
a0d95ea
2191528
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -720,7 +720,7 @@ def _get_errored_sessions_for_overview( | |
end: datetime, | ||
) -> Mapping[tuple[int, str], int]: | ||
""" | ||
Count of errored sessions, incl fatal (abnormal, crashed) sessions, | ||
Count of errored sessions, incl fatal (abnormal, unhandled, crashed) session | ||
excl errored *preaggregated* sessions | ||
""" | ||
project_ids = [p.id for p in projects] | ||
|
@@ -774,12 +774,13 @@ def _get_session_by_status_for_overview( | |
end: datetime, | ||
) -> Mapping[tuple[int, str, str], int]: | ||
""" | ||
Counts of init, abnormal and crashed sessions, purpose-built for overview | ||
Counts of init, abnormal, unhandled and crashed sessions, purpose-built for overview | ||
""" | ||
project_ids = [p.id for p in projects] | ||
|
||
select = [ | ||
MetricField(metric_mri=SessionMRI.ABNORMAL.value, alias="abnormal", op=None), | ||
MetricField(metric_mri=SessionMRI.UNHANDLED.value, alias="unhandled", op=None), | ||
MetricField(metric_mri=SessionMRI.CRASHED.value, alias="crashed", op=None), | ||
MetricField(metric_mri=SessionMRI.ALL.value, alias="init", op=None), | ||
MetricField( | ||
|
@@ -820,7 +821,13 @@ def _get_session_by_status_for_overview( | |
release = by.get("release") | ||
|
||
totals = group.get("totals", {}) | ||
for status in ["abnormal", "crashed", "init", "errored_preaggr"]: | ||
for status in [ | ||
"abnormal", | ||
"unhandled", | ||
"crashed", | ||
"init", | ||
"errored_preaggr", | ||
]: | ||
value = totals.get(status) | ||
if value is not None and value != 0.0: | ||
ret_val[(proj_id, release, status)] = value | ||
|
@@ -1037,8 +1044,10 @@ def get_release_health_data_overview( | |
if not has_health_data and summary_stats_period != "90d": | ||
fetch_has_health_data_releases.add((project_id, release)) | ||
|
||
sessions_unhandled = rv_sessions.get((project_id, release, "unhandled"), 0) | ||
sessions_crashed = rv_sessions.get((project_id, release, "crashed"), 0) | ||
|
||
users_unhandled = rv_users.get((project_id, release, "unhandled_users"), 0) | ||
users_crashed = rv_users.get((project_id, release, "crashed_users"), 0) | ||
|
||
rv_row = rv[project_id, release] = { | ||
|
@@ -1050,19 +1059,38 @@ def get_release_health_data_overview( | |
"total_project_sessions_24h": adoption_info.get("project_sessions_24h"), | ||
"total_sessions": total_sessions, | ||
"total_users": total_users, | ||
"has_health_data": has_health_data, | ||
"sessions_crashed": sessions_crashed, | ||
# Users where the error was `unhandled`; possibly resulting in a crash | ||
"unhandled_users": ( | ||
100 - (users_unhandled + users_crashed) / total_users * 100 | ||
if total_users | ||
else None | ||
), | ||
# Users where the error was not a crash (but may have been unhandled) | ||
"crash_free_users": ( | ||
100 - users_crashed / total_users * 100 if total_users else None | ||
), | ||
"has_health_data": has_health_data, | ||
# Sessions where the error was specifically `unhandled`; NOT resulting in a crash | ||
"sessions_unhandled": sessions_unhandled, | ||
# Sessions where the error was a crash | ||
"sessions_crashed": sessions_crashed, | ||
# Sessions where the error was `unhandled`; possibly resulting in a crash | ||
"unhandled_sessions": ( | ||
100 - (sessions_unhandled + sessions_crashed) / float(total_sessions) * 100 | ||
if total_sessions | ||
else None | ||
), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Error Metrics Calculation InversionThe Locations (1) |
||
# Sessions where the error was not a crash (but may have been unhandled) | ||
"crash_free_sessions": ( | ||
100 - sessions_crashed / float(total_sessions) * 100 if total_sessions else None | ||
), | ||
# Sessions where the error was handled | ||
"sessions_errored": max( | ||
0, | ||
rv_errored_sessions.get((project_id, release), 0) | ||
+ rv_sessions.get((project_id, release, "errored_preaggr"), 0) | ||
- sessions_crashed | ||
- sessions_unhandled | ||
- rv_sessions.get((project_id, release, "abnormal"), 0), | ||
), | ||
"duration_p50": None, | ||
|
@@ -1427,6 +1455,9 @@ def get_project_release_stats( | |
MetricField( | ||
metric_mri=SessionMRI.CRASHED_USER.value, alias="users_crashed", op=None | ||
), | ||
MetricField( | ||
metric_mri=SessionMRI.UNHANDLED_USER.value, alias="users_unhandled", op=None | ||
), | ||
MetricField( | ||
metric_mri=SessionMRI.ERRORED_USER.value, alias="users_errored", op=None | ||
), | ||
|
@@ -1440,6 +1471,11 @@ def get_project_release_stats( | |
MetricField( | ||
metric_mri=SessionMRI.ABNORMAL.value, alias="sessions_abnormal", op=None | ||
), | ||
MetricField( | ||
metric_mri=SessionMRI.UNHANDLED.value, | ||
alias="sessions_unhandled", | ||
op=None, | ||
), | ||
MetricField(metric_mri=SessionMRI.CRASHED.value, alias="sessions_crashed", op=None), | ||
MetricField(metric_mri=SessionMRI.ERRORED.value, alias="sessions_errored", op=None), | ||
MetricField(metric_mri=SessionMRI.HEALTHY.value, alias="sessions_healthy", op=None), | ||
|
@@ -1500,6 +1536,7 @@ def get_project_release_stats( | |
f"{stat}": 0, | ||
f"{stat}_abnormal": 0, | ||
f"{stat}_crashed": 0, | ||
f"{stat}_unhandled": 0, | ||
f"{stat}_errored": 0, | ||
f"{stat}_healthy": 0, | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -72,20 +72,32 @@ class SessionMRI(Enum): | |
ERRORED_PREAGGREGATED = "e:sessions/error.preaggr@none" | ||
ERRORED_SET = "e:sessions/error.unique@none" | ||
ERRORED_ALL = "e:sessions/all_errored@none" | ||
HANDLED = "e:sessions/handled.unique@none" # all sessions excluding handled and crashed | ||
UNHANDLED = "e:sessions/unhandled@none" # unhandled, does not include crashed | ||
CRASHED_AND_ABNORMAL = "e:sessions/crashed_abnormal@none" | ||
CRASHED = "e:sessions/crashed@none" | ||
CRASH_FREE = "e:sessions/crash_free@none" | ||
ABNORMAL = "e:sessions/abnormal@none" | ||
HANDLED_RATE = "e:sessions/handled_rate@ratio" # all sessions excluding handled and crashed | ||
UNHANDLED_RATE = "e:sessions/unhandled_rate@ratio" # unhandled, does not include crashed | ||
CRASH_RATE = "e:sessions/crash_rate@ratio" | ||
CRASH_FREE_RATE = "e:sessions/crash_free_rate@ratio" | ||
CRASH_FREE_RATE = "e:sessions/crash_free_rate@ratio" # includes handled and unhandled | ||
ALL_USER = "e:sessions/user.all@none" | ||
HEALTHY_USER = "e:sessions/user.healthy@none" | ||
ERRORED_USER = "e:sessions/user.errored@none" | ||
ERRORED_USER_ALL = "e:sessions/user.all_errored@none" | ||
HANDLED_USER = "e:sessions/user.handled@none" # all sessions excluding handled and crashed | ||
UNHANDLED_USER = "e:sessions/user.unhandled@none" # unhandled, does not include crashed | ||
CRASHED_AND_ABNORMAL_USER = "e:sessions/user.crashed_abnormal@none" | ||
CRASHED_USER = "e:sessions/user.crashed@none" | ||
CRASH_FREE_USER = "e:sessions/user.crash_free@none" | ||
ABNORMAL_USER = "e:sessions/user.abnormal@none" | ||
HANDLED_USER_RATE = ( | ||
"e:sessions/user.handled_rate@ratio" # all sessions excluding handled and crashed | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Incorrect Comments for Handled MetricsThe comments for Locations (2) |
||
UNHANDLED_USER_RATE = ( | ||
"e:sessions/user.unhandled_rate@ratio" # unhandled, does not include crashed | ||
) | ||
CRASH_USER_RATE = "e:sessions/user.crash_rate@ratio" | ||
CRASH_FREE_USER_RATE = "e:sessions/user.crash_free_rate@ratio" | ||
ANR_USER = "e:sessions/user.anr@none" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bug: Metric Mismatch: Unhandled Users Not Tracked
The
unhandled_users
metric is always 0 because the_get_users_and_crashed_users_for_overview
function, which populates therv_users
dictionary, does not query for "unhandled_users" data. It only fetches "all_users" and "crashed_users". Consequently, theusers_unhandled
value defaults to 0, making the calculatedunhandled_users
percentage incorrect and effectively identical tocrash_free_users
.Locations (1)
src/sentry/release_health/metrics.py#L1049-L1067