Skip to content

Commit 5778c7a

Browse files
authored
chore(aci): Track overdue delayed event processing (#96718)
For better visibility into processing latency across tasks, track the distribution of particularly high lag in event scheduling timestamps.
1 parent f4ba853 commit 5778c7a

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

src/sentry/workflow_engine/processors/delayed_workflow.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,12 +402,23 @@ def get_condition_query_groups(
402402
Map unique condition queries to the group IDs that need to checked for that query.
403403
"""
404404
condition_groups: dict[UniqueConditionQuery, GroupQueryParams] = defaultdict(GroupQueryParams)
405-
405+
now = timezone.now()
406406
for dcg in data_condition_groups:
407407
slow_conditions = dcg_to_slow_conditions[dcg.id]
408408
workflow_id = event_data.dcg_to_workflow.get(dcg.id)
409409
workflow_env = workflows_to_envs[workflow_id] if workflow_id else None
410410
timestamp = event_data.dcg_to_timestamp[dcg.id]
411+
if timestamp is not None:
412+
delay = now - timestamp
413+
# If it's been more than 1.5 minutes, we're taking too long to process the event and
414+
# want to know how bad it is. It's a biased sample, but let's us see if we've somewhat
415+
# over or very over.
416+
if delay.total_seconds() > 90:
417+
metrics.timing(
418+
"workflow_engine.overdue_event_lag",
419+
delay.total_seconds(),
420+
sample_rate=1.0,
421+
)
411422
for condition in slow_conditions:
412423
for condition_query in generate_unique_queries(condition, workflow_env):
413424
condition_groups[condition_query].update(

0 commit comments

Comments
 (0)