Skip to content

Commit 91bdd10

Browse files
authored
fix(replays): Improve event parser to extract correct fields and handle known bad fields gracefully (#96467)
1 parent 7ed29b9 commit 91bdd10

File tree

3 files changed

+141
-46
lines changed

3 files changed

+141
-46
lines changed

src/sentry/replays/lib/summarize.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,8 @@ def as_log_message(event: dict[str, Any]) -> str | None:
341341
return None
342342
case EventType.RESOURCE_SCRIPT:
343343
return None
344+
case EventType.CLS:
345+
return None
344346
except (KeyError, ValueError):
345347
logger.exception(
346348
"Error parsing event in replay AI summary",

src/sentry/replays/usecases/ingest/event_parser.py

Lines changed: 45 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ class EventType(Enum):
110110
UI_BLUR = 18
111111
UI_FOCUS = 19
112112
UNKNOWN = 20
113+
CLS = 21
113114

114115

115116
def which(event: dict[str, Any]) -> EventType:
@@ -189,6 +190,8 @@ def which(event: dict[str, Any]) -> EventType:
189190
return EventType.LCP
190191
elif payload["description"] == "first-contentful-paint":
191192
return EventType.FCP
193+
elif payload["description"] == "cumulative-layout-shift":
194+
return EventType.CLS
192195
else:
193196
return EventType.UNKNOWN
194197
elif op == "memory":
@@ -311,6 +314,10 @@ def as_trace_item_context(event_type: EventType, event: dict[str, Any]) -> Trace
311314
case EventType.CLICK | EventType.DEAD_CLICK | EventType.RAGE_CLICK | EventType.SLOW_CLICK:
312315
payload = event["data"]["payload"]
313316

317+
# If the node wasn't provided we're forced to skip the event.
318+
if "node" not in payload["data"]:
319+
return None
320+
314321
node = payload["data"]["node"]
315322
node_attributes = node.get("attributes", {})
316323
click_attributes = {
@@ -352,10 +359,7 @@ def as_trace_item_context(event_type: EventType, event: dict[str, Any]) -> Trace
352359
payload = event["data"]["payload"]
353360
payload_data = payload["data"]
354361

355-
navigation_attributes = {
356-
"category": "navigation",
357-
"url": as_string_strict(event["data"]["payload"]["description"]),
358-
}
362+
navigation_attributes = {"category": "navigation"}
359363
if "from" in payload_data:
360364
navigation_attributes["from"] = as_string_strict(payload_data["from"])
361365
if "to" in payload_data:
@@ -373,25 +377,24 @@ def as_trace_item_context(event_type: EventType, event: dict[str, Any]) -> Trace
373377
case EventType.UI_FOCUS:
374378
return None
375379
case EventType.RESOURCE_FETCH | EventType.RESOURCE_XHR:
380+
payload = event["data"]["payload"]
381+
376382
resource_attributes = {
377383
"category": (
378384
"resource.xhr" if event_type == EventType.RESOURCE_XHR else "resource.fetch"
379385
),
380-
"url": as_string_strict(event["data"]["payload"]["description"]),
381-
"method": str(event["data"]["payload"]["data"]["method"]),
382-
"statusCode": int(event["data"]["payload"]["data"]["statusCode"]),
383-
"duration": float(event["data"]["payload"]["endTimestamp"])
384-
- float(event["data"]["payload"]["startTimestamp"]),
386+
"url": as_string_strict(payload["description"]),
387+
"method": str(payload["data"]["method"]),
388+
"duration": float(payload["endTimestamp"]) - float(payload["startTimestamp"]),
385389
}
386390

387-
for key, value in (
388-
event["data"]["payload"]["data"].get("request", {}).get("headers", {}).items()
389-
):
391+
if "statusCode" in payload["data"]:
392+
resource_attributes["statusCode"] = int(payload["data"]["statusCode"])
393+
394+
for key, value in payload["data"].get("request", {}).get("headers", {}).items():
390395
resource_attributes[f"request.headers.{key}"] = str(value)
391396

392-
for key, value in (
393-
event["data"]["payload"]["data"].get("response", {}).get("headers", {}).items()
394-
):
397+
for key, value in payload["data"].get("response", {}).get("headers", {}).items():
395398
resource_attributes[f"response.headers.{key}"] = str(value)
396399

397400
request_size, response_size = parse_network_content_lengths(event)
@@ -403,7 +406,7 @@ def as_trace_item_context(event_type: EventType, event: dict[str, Any]) -> Trace
403406
return {
404407
"attributes": resource_attributes,
405408
"event_hash": uuid.uuid4().bytes,
406-
"timestamp": float(event["data"]["payload"]["startTimestamp"]),
409+
"timestamp": float(payload["startTimestamp"]),
407410
}
408411
case EventType.RESOURCE_SCRIPT | EventType.RESOURCE_IMAGE:
409412
return {
@@ -424,17 +427,27 @@ def as_trace_item_context(event_type: EventType, event: dict[str, Any]) -> Trace
424427
"event_hash": uuid.uuid4().bytes,
425428
"timestamp": float(event["data"]["payload"]["startTimestamp"]),
426429
}
427-
case EventType.LCP | EventType.FCP:
430+
case EventType.LCP | EventType.FCP | EventType.CLS:
428431
payload = event["data"]["payload"]
432+
433+
if event_type == EventType.CLS:
434+
category = "web-vital.cls"
435+
elif event_type == EventType.FCP:
436+
category = "web-vital.fcp"
437+
else:
438+
category = "web-vital.lcp"
439+
429440
return {
430441
"attributes": {
431-
"category": "web-vital.fcp" if event_type == EventType.FCP else "web-vital.lcp",
442+
"category": category,
443+
"duration": float(event["data"]["payload"]["endTimestamp"])
444+
- float(event["data"]["payload"]["startTimestamp"]),
432445
"rating": as_string_strict(payload["data"]["rating"]),
433-
"size": int(payload["data"]["size"]),
434-
"value": int(payload["data"]["value"]),
446+
"size": float(payload["data"]["size"]),
447+
"value": float(payload["data"]["value"]),
435448
},
436449
"event_hash": uuid.uuid4().bytes,
437-
"timestamp": float(payload["timestamp"]),
450+
"timestamp": float(payload["startTimestamp"]),
438451
}
439452
case EventType.HYDRATION_ERROR:
440453
payload = event["data"]["payload"]
@@ -488,9 +501,9 @@ def as_trace_item_context(event_type: EventType, event: dict[str, Any]) -> Trace
488501
return {
489502
"attributes": {
490503
"category": "memory",
491-
"jsHeapSizeLimit": int(payload["data"]["jsHeapSizeLimit"]),
492-
"totalJSHeapSize": int(payload["data"]["totalJSHeapSize"]),
493-
"usedJSHeapSize": int(payload["data"]["usedJSHeapSize"]),
504+
"jsHeapSizeLimit": int(payload["data"]["memory"]["jsHeapSizeLimit"]),
505+
"totalJSHeapSize": int(payload["data"]["memory"]["totalJSHeapSize"]),
506+
"usedJSHeapSize": int(payload["data"]["memory"]["usedJSHeapSize"]),
494507
"endTimestamp": float(payload["endTimestamp"]),
495508
"duration": float(event["data"]["payload"]["endTimestamp"])
496509
- float(event["data"]["payload"]["startTimestamp"]),
@@ -577,13 +590,13 @@ def as_highlighted_event(
577590
return {"mutations": [MutationEvent(event["data"]["payload"])]}
578591
elif event_type == EventType.CLICK or event_type == EventType.SLOW_CLICK:
579592
click = parse_click_event(event["data"]["payload"], is_dead=False, is_rage=False)
580-
return {"clicks": [click]}
593+
return {"clicks": [click]} if click else {}
581594
elif event_type == EventType.DEAD_CLICK:
582595
click = parse_click_event(event["data"]["payload"], is_dead=True, is_rage=False)
583-
return {"clicks": [click]}
596+
return {"clicks": [click]} if click else {}
584597
elif event_type == EventType.RAGE_CLICK:
585598
click = parse_click_event(event["data"]["payload"], is_dead=True, is_rage=True)
586-
return {"clicks": [click]}
599+
return {"clicks": [click]} if click else {}
587600
elif event_type == EventType.RESOURCE_FETCH or event_type == EventType.RESOURCE_XHR:
588601
lengths = parse_network_content_lengths(event)
589602
if lengths != (None, None):
@@ -626,10 +639,11 @@ def _get_response_size(data: dict[str, Any]) -> int:
626639
return request_size, response_size
627640

628641

629-
def parse_click_event(payload: dict[str, Any], is_dead: bool, is_rage: bool) -> ClickEvent:
630-
node = payload["data"]["node"]
631-
assert node is not None
632-
assert node["id"] >= 0
642+
def parse_click_event(payload: dict[str, Any], is_dead: bool, is_rage: bool) -> ClickEvent | None:
643+
node = payload["data"].get("node")
644+
645+
if not isinstance(node, dict) or node.get("id", -1) < 0:
646+
return None
633647

634648
attributes = node.get("attributes", {})
635649

tests/sentry/replays/unit/test_event_parser.py

Lines changed: 94 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,27 @@ def test_parse_highlighted_events_click_events():
313313
assert user_actions.click_events[0].timestamp == 1674298825
314314

315315

316+
def test_parse_highlighted_events_click_events_missing_node():
317+
event = {
318+
"type": 5,
319+
"timestamp": 1674298825,
320+
"data": {
321+
"tag": "breadcrumb",
322+
"payload": {
323+
"timestamp": 1674298825.403,
324+
"type": "default",
325+
"category": "ui.click",
326+
"message": "div#hello.hello.world",
327+
"data": {"nodeId": 1},
328+
},
329+
},
330+
}
331+
332+
builder = HighlightedEventsBuilder()
333+
builder.add(which(event), event, sampled=False)
334+
assert len(builder.result.click_events) == 0
335+
336+
316337
def test_parse_highlighted_events_click_event_str_payload():
317338
event = {"type": 5, "data": {"tag": "breadcrumb", "payload": "hello world"}}
318339
builder = HighlightedEventsBuilder()
@@ -811,6 +832,22 @@ def test_as_trace_item_context_click_event():
811832
assert "event_hash" in result and len(result["event_hash"]) == 16
812833

813834

835+
def test_as_trace_item_context_click_event_missing_node():
836+
event = {
837+
"data": {
838+
"payload": {
839+
"timestamp": 1674298825.403,
840+
"message": "div#hello.hello.world",
841+
"data": {},
842+
"url": "https://example.com/form",
843+
}
844+
}
845+
}
846+
847+
result = as_trace_item_context(EventType.CLICK, event)
848+
assert result is None
849+
850+
814851
def test_as_trace_item_context_dead_click_event():
815852
event = {
816853
"data": {
@@ -866,7 +903,6 @@ def test_as_trace_item_context_navigation_event():
866903
"data": {
867904
"payload": {
868905
"timestamp": 1674298825.0,
869-
"description": "https://sentry.io/",
870906
"data": {"from": "/old-page", "to": "/new-page"},
871907
}
872908
}
@@ -886,7 +922,6 @@ def test_as_trace_item_context_navigation_event_missing_optional_fields():
886922
"data": {
887923
"payload": {
888924
"timestamp": 1674298825.0,
889-
"description": "https://sentry.io/",
890925
"data": {},
891926
}
892927
}
@@ -1034,7 +1069,8 @@ def test_as_trace_item_context_lcp_event():
10341069
event = {
10351070
"data": {
10361071
"payload": {
1037-
"timestamp": 1674298825.0,
1072+
"startTimestamp": 1674298825.0,
1073+
"endTimestamp": 1674298825.0,
10381074
"data": {"rating": "good", "size": 1024, "value": 1500},
10391075
}
10401076
}
@@ -1044,6 +1080,7 @@ def test_as_trace_item_context_lcp_event():
10441080
assert result is not None
10451081
assert result["timestamp"] == 1674298825.0
10461082
assert result["attributes"]["category"] == "web-vital.lcp"
1083+
assert result["attributes"]["duration"] == 0
10471084
assert result["attributes"]["rating"] == "good"
10481085
assert result["attributes"]["size"] == 1024
10491086
assert result["attributes"]["value"] == 1500
@@ -1054,7 +1091,8 @@ def test_as_trace_item_context_fcp_event():
10541091
event = {
10551092
"data": {
10561093
"payload": {
1057-
"timestamp": 1674298825.0,
1094+
"startTimestamp": 1674298825.0,
1095+
"endTimestamp": 1674298825.0,
10581096
"data": {"rating": "needs-improvement", "size": 512, "value": 2000},
10591097
}
10601098
}
@@ -1063,12 +1101,46 @@ def test_as_trace_item_context_fcp_event():
10631101
result = as_trace_item_context(EventType.FCP, event)
10641102
assert result is not None
10651103
assert result["attributes"]["category"] == "web-vital.fcp"
1104+
assert result["attributes"]["duration"] == 0
10661105
assert result["attributes"]["rating"] == "needs-improvement"
10671106
assert result["attributes"]["size"] == 512
10681107
assert result["attributes"]["value"] == 2000
10691108
assert "event_hash" in result and len(result["event_hash"]) == 16
10701109

10711110

1111+
def test_as_trace_item_context_cls_event():
1112+
event = {
1113+
"type": 5,
1114+
"timestamp": 1753467516.4146557,
1115+
"data": {
1116+
"tag": "performanceSpan",
1117+
"payload": {
1118+
"op": "web-vital",
1119+
"description": "cumulative-layout-shift",
1120+
"startTimestamp": 1753467516.4146557,
1121+
"endTimestamp": 1753467516.4146557,
1122+
"data": {
1123+
"value": 0.6558277147341711,
1124+
"size": 0.6558277147341711,
1125+
"rating": "poor",
1126+
"nodeIds": [1239, 1072, 1244, 1243, 891],
1127+
"attributions": [
1128+
{"value": 0.6558277147341711, "nodeIds": [1239, 1072, 1244, 1243, 891]}
1129+
],
1130+
},
1131+
},
1132+
},
1133+
}
1134+
result = as_trace_item_context(EventType.CLS, event)
1135+
assert result is not None
1136+
assert result["attributes"]["category"] == "web-vital.cls"
1137+
assert result["attributes"]["duration"] == 0
1138+
assert result["attributes"]["rating"] == "poor"
1139+
assert result["attributes"]["size"] == 0.6558277147341711
1140+
assert result["attributes"]["value"] == 0.6558277147341711
1141+
assert "event_hash" in result and len(result["event_hash"]) == 16
1142+
1143+
10721144
def test_as_trace_item_context_hydration_error():
10731145
event = {
10741146
"data": {
@@ -1137,27 +1209,34 @@ def test_as_trace_item_context_options():
11371209

11381210
def test_as_trace_item_context_memory():
11391211
event = {
1212+
"type": 5,
1213+
"timestamp": 1753467523.594,
11401214
"data": {
1215+
"tag": "performanceSpan",
11411216
"payload": {
1142-
"startTimestamp": 1674298825.0,
1143-
"endTimestamp": 1674298826.5,
1217+
"op": "memory",
1218+
"description": "memory",
1219+
"startTimestamp": 1753467523.594,
1220+
"endTimestamp": 1753467523.594,
11441221
"data": {
1145-
"jsHeapSizeLimit": 4294705152,
1146-
"totalJSHeapSize": 50331648,
1147-
"usedJSHeapSize": 30000000,
1222+
"memory": {
1223+
"jsHeapSizeLimit": 4294705152,
1224+
"totalJSHeapSize": 111507602,
1225+
"usedJSHeapSize": 69487254,
1226+
}
11481227
},
1149-
}
1150-
}
1228+
},
1229+
},
11511230
}
11521231

11531232
result = as_trace_item_context(EventType.MEMORY, event)
11541233
assert result is not None
1155-
assert result["timestamp"] == 1674298825.0
1234+
assert result["timestamp"] == 1753467523.594
11561235
assert result["attributes"]["category"] == "memory"
11571236
assert result["attributes"]["jsHeapSizeLimit"] == 4294705152
1158-
assert result["attributes"]["totalJSHeapSize"] == 50331648
1159-
assert result["attributes"]["usedJSHeapSize"] == 30000000
1160-
assert result["attributes"]["endTimestamp"] == 1674298826.5
1237+
assert result["attributes"]["totalJSHeapSize"] == 111507602
1238+
assert result["attributes"]["usedJSHeapSize"] == 69487254
1239+
assert result["attributes"]["endTimestamp"] == 1753467523.594
11611240
assert "event_hash" in result and len(result["event_hash"]) == 16
11621241

11631242

0 commit comments

Comments
 (0)