Skip to content

fix(replays): Improve event parser to extract correct fields and handle known bad fields gracefully #96467

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/sentry/replays/lib/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ def as_log_message(event: dict[str, Any]) -> str | None:
return None
case EventType.RESOURCE_SCRIPT:
return None
case EventType.CLS:
return None
except (KeyError, ValueError):
logger.exception(
"Error parsing event in replay AI summary",
Expand Down
76 changes: 45 additions & 31 deletions src/sentry/replays/usecases/ingest/event_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ class EventType(Enum):
UI_BLUR = 18
UI_FOCUS = 19
UNKNOWN = 20
CLS = 21


def which(event: dict[str, Any]) -> EventType:
Expand Down Expand Up @@ -189,6 +190,8 @@ def which(event: dict[str, Any]) -> EventType:
return EventType.LCP
elif payload["description"] == "first-contentful-paint":
return EventType.FCP
elif payload["description"] == "cumulative-layout-shift":
return EventType.CLS
else:
return EventType.UNKNOWN
elif op == "memory":
Expand Down Expand Up @@ -311,6 +314,10 @@ def as_trace_item_context(event_type: EventType, event: dict[str, Any]) -> Trace
case EventType.CLICK | EventType.DEAD_CLICK | EventType.RAGE_CLICK | EventType.SLOW_CLICK:
payload = event["data"]["payload"]

# If the node wasn't provided we're forced to skip the event.
if "node" not in payload["data"]:
return None

node = payload["data"]["node"]
node_attributes = node.get("attributes", {})
click_attributes = {
Expand Down Expand Up @@ -352,10 +359,7 @@ def as_trace_item_context(event_type: EventType, event: dict[str, Any]) -> Trace
payload = event["data"]["payload"]
payload_data = payload["data"]

navigation_attributes = {
"category": "navigation",
"url": as_string_strict(event["data"]["payload"]["description"]),
}
navigation_attributes = {"category": "navigation"}
if "from" in payload_data:
navigation_attributes["from"] = as_string_strict(payload_data["from"])
if "to" in payload_data:
Expand All @@ -373,25 +377,24 @@ def as_trace_item_context(event_type: EventType, event: dict[str, Any]) -> Trace
case EventType.UI_FOCUS:
return None
case EventType.RESOURCE_FETCH | EventType.RESOURCE_XHR:
payload = event["data"]["payload"]

resource_attributes = {
"category": (
"resource.xhr" if event_type == EventType.RESOURCE_XHR else "resource.fetch"
),
"url": as_string_strict(event["data"]["payload"]["description"]),
"method": str(event["data"]["payload"]["data"]["method"]),
"statusCode": int(event["data"]["payload"]["data"]["statusCode"]),
"duration": float(event["data"]["payload"]["endTimestamp"])
- float(event["data"]["payload"]["startTimestamp"]),
"url": as_string_strict(payload["description"]),
"method": str(payload["data"]["method"]),
"duration": float(payload["endTimestamp"]) - float(payload["startTimestamp"]),
}

for key, value in (
event["data"]["payload"]["data"].get("request", {}).get("headers", {}).items()
):
if "statusCode" in payload["data"]:
resource_attributes["statusCode"] = int(payload["data"]["statusCode"])

for key, value in payload["data"].get("request", {}).get("headers", {}).items():
resource_attributes[f"request.headers.{key}"] = str(value)

for key, value in (
event["data"]["payload"]["data"].get("response", {}).get("headers", {}).items()
):
for key, value in payload["data"].get("response", {}).get("headers", {}).items():
resource_attributes[f"response.headers.{key}"] = str(value)

request_size, response_size = parse_network_content_lengths(event)
Expand All @@ -403,7 +406,7 @@ def as_trace_item_context(event_type: EventType, event: dict[str, Any]) -> Trace
return {
"attributes": resource_attributes,
"event_hash": uuid.uuid4().bytes,
"timestamp": float(event["data"]["payload"]["startTimestamp"]),
"timestamp": float(payload["startTimestamp"]),
}
case EventType.RESOURCE_SCRIPT | EventType.RESOURCE_IMAGE:
return {
Expand All @@ -424,17 +427,27 @@ def as_trace_item_context(event_type: EventType, event: dict[str, Any]) -> Trace
"event_hash": uuid.uuid4().bytes,
"timestamp": float(event["data"]["payload"]["startTimestamp"]),
}
case EventType.LCP | EventType.FCP:
case EventType.LCP | EventType.FCP | EventType.CLS:
payload = event["data"]["payload"]

if event_type == EventType.CLS:
category = "web-vital.cls"
elif event_type == EventType.FCP:
category = "web-vital.fcp"
else:
category = "web-vital.lcp"

return {
"attributes": {
"category": "web-vital.fcp" if event_type == EventType.FCP else "web-vital.lcp",
"category": category,
"duration": float(event["data"]["payload"]["endTimestamp"])
- float(event["data"]["payload"]["startTimestamp"]),
"rating": as_string_strict(payload["data"]["rating"]),
"size": int(payload["data"]["size"]),
"value": int(payload["data"]["value"]),
"size": float(payload["data"]["size"]),
"value": float(payload["data"]["value"]),
},
"event_hash": uuid.uuid4().bytes,
"timestamp": float(payload["timestamp"]),
"timestamp": float(payload["startTimestamp"]),
}
case EventType.HYDRATION_ERROR:
payload = event["data"]["payload"]
Expand Down Expand Up @@ -488,9 +501,9 @@ def as_trace_item_context(event_type: EventType, event: dict[str, Any]) -> Trace
return {
"attributes": {
"category": "memory",
"jsHeapSizeLimit": int(payload["data"]["jsHeapSizeLimit"]),
"totalJSHeapSize": int(payload["data"]["totalJSHeapSize"]),
"usedJSHeapSize": int(payload["data"]["usedJSHeapSize"]),
"jsHeapSizeLimit": int(payload["data"]["memory"]["jsHeapSizeLimit"]),
"totalJSHeapSize": int(payload["data"]["memory"]["totalJSHeapSize"]),
"usedJSHeapSize": int(payload["data"]["memory"]["usedJSHeapSize"]),
"endTimestamp": float(payload["endTimestamp"]),
"duration": float(event["data"]["payload"]["endTimestamp"])
- float(event["data"]["payload"]["startTimestamp"]),
Expand Down Expand Up @@ -577,13 +590,13 @@ def as_highlighted_event(
return {"mutations": [MutationEvent(event["data"]["payload"])]}
elif event_type == EventType.CLICK or event_type == EventType.SLOW_CLICK:
click = parse_click_event(event["data"]["payload"], is_dead=False, is_rage=False)
return {"clicks": [click]}
return {"clicks": [click]} if click else {}
elif event_type == EventType.DEAD_CLICK:
click = parse_click_event(event["data"]["payload"], is_dead=True, is_rage=False)
return {"clicks": [click]}
return {"clicks": [click]} if click else {}
elif event_type == EventType.RAGE_CLICK:
click = parse_click_event(event["data"]["payload"], is_dead=True, is_rage=True)
return {"clicks": [click]}
return {"clicks": [click]} if click else {}
elif event_type == EventType.RESOURCE_FETCH or event_type == EventType.RESOURCE_XHR:
lengths = parse_network_content_lengths(event)
if lengths != (None, None):
Expand Down Expand Up @@ -626,10 +639,11 @@ def _get_response_size(data: dict[str, Any]) -> int:
return request_size, response_size


def parse_click_event(payload: dict[str, Any], is_dead: bool, is_rage: bool) -> ClickEvent:
node = payload["data"]["node"]
assert node is not None
assert node["id"] >= 0
def parse_click_event(payload: dict[str, Any], is_dead: bool, is_rage: bool) -> ClickEvent | None:
node = payload["data"].get("node")

if not isinstance(node, dict) or node.get("id", -1) < 0:
return None

attributes = node.get("attributes", {})

Expand Down
109 changes: 94 additions & 15 deletions tests/sentry/replays/unit/test_event_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,27 @@ def test_parse_highlighted_events_click_events():
assert user_actions.click_events[0].timestamp == 1674298825


def test_parse_highlighted_events_click_events_missing_node():
event = {
"type": 5,
"timestamp": 1674298825,
"data": {
"tag": "breadcrumb",
"payload": {
"timestamp": 1674298825.403,
"type": "default",
"category": "ui.click",
"message": "div#hello.hello.world",
"data": {"nodeId": 1},
},
},
}

builder = HighlightedEventsBuilder()
builder.add(which(event), event, sampled=False)
assert len(builder.result.click_events) == 0


def test_parse_highlighted_events_click_event_str_payload():
event = {"type": 5, "data": {"tag": "breadcrumb", "payload": "hello world"}}
builder = HighlightedEventsBuilder()
Expand Down Expand Up @@ -811,6 +832,22 @@ def test_as_trace_item_context_click_event():
assert "event_hash" in result and len(result["event_hash"]) == 16


def test_as_trace_item_context_click_event_missing_node():
event = {
"data": {
"payload": {
"timestamp": 1674298825.403,
"message": "div#hello.hello.world",
"data": {},
"url": "https://example.com/form",
}
}
}

result = as_trace_item_context(EventType.CLICK, event)
assert result is None


def test_as_trace_item_context_dead_click_event():
event = {
"data": {
Expand Down Expand Up @@ -866,7 +903,6 @@ def test_as_trace_item_context_navigation_event():
"data": {
"payload": {
"timestamp": 1674298825.0,
"description": "https://sentry.io/",
"data": {"from": "/old-page", "to": "/new-page"},
}
}
Expand All @@ -886,7 +922,6 @@ def test_as_trace_item_context_navigation_event_missing_optional_fields():
"data": {
"payload": {
"timestamp": 1674298825.0,
"description": "https://sentry.io/",
"data": {},
}
}
Expand Down Expand Up @@ -1034,7 +1069,8 @@ def test_as_trace_item_context_lcp_event():
event = {
"data": {
"payload": {
"timestamp": 1674298825.0,
"startTimestamp": 1674298825.0,
"endTimestamp": 1674298825.0,
"data": {"rating": "good", "size": 1024, "value": 1500},
}
}
Expand All @@ -1044,6 +1080,7 @@ def test_as_trace_item_context_lcp_event():
assert result is not None
assert result["timestamp"] == 1674298825.0
assert result["attributes"]["category"] == "web-vital.lcp"
assert result["attributes"]["duration"] == 0
assert result["attributes"]["rating"] == "good"
assert result["attributes"]["size"] == 1024
assert result["attributes"]["value"] == 1500
Expand All @@ -1054,7 +1091,8 @@ def test_as_trace_item_context_fcp_event():
event = {
"data": {
"payload": {
"timestamp": 1674298825.0,
"startTimestamp": 1674298825.0,
"endTimestamp": 1674298825.0,
"data": {"rating": "needs-improvement", "size": 512, "value": 2000},
}
}
Expand All @@ -1063,12 +1101,46 @@ def test_as_trace_item_context_fcp_event():
result = as_trace_item_context(EventType.FCP, event)
assert result is not None
assert result["attributes"]["category"] == "web-vital.fcp"
assert result["attributes"]["duration"] == 0
assert result["attributes"]["rating"] == "needs-improvement"
assert result["attributes"]["size"] == 512
assert result["attributes"]["value"] == 2000
assert "event_hash" in result and len(result["event_hash"]) == 16


def test_as_trace_item_context_cls_event():
event = {
"type": 5,
"timestamp": 1753467516.4146557,
"data": {
"tag": "performanceSpan",
"payload": {
"op": "web-vital",
"description": "cumulative-layout-shift",
"startTimestamp": 1753467516.4146557,
"endTimestamp": 1753467516.4146557,
"data": {
"value": 0.6558277147341711,
"size": 0.6558277147341711,
"rating": "poor",
"nodeIds": [1239, 1072, 1244, 1243, 891],
"attributions": [
{"value": 0.6558277147341711, "nodeIds": [1239, 1072, 1244, 1243, 891]}
],
},
},
},
}
result = as_trace_item_context(EventType.CLS, event)
assert result is not None
assert result["attributes"]["category"] == "web-vital.cls"
assert result["attributes"]["duration"] == 0
assert result["attributes"]["rating"] == "poor"
assert result["attributes"]["size"] == 0.6558277147341711
assert result["attributes"]["value"] == 0.6558277147341711
assert "event_hash" in result and len(result["event_hash"]) == 16


def test_as_trace_item_context_hydration_error():
event = {
"data": {
Expand Down Expand Up @@ -1137,27 +1209,34 @@ def test_as_trace_item_context_options():

def test_as_trace_item_context_memory():
event = {
"type": 5,
"timestamp": 1753467523.594,
"data": {
"tag": "performanceSpan",
"payload": {
"startTimestamp": 1674298825.0,
"endTimestamp": 1674298826.5,
"op": "memory",
"description": "memory",
"startTimestamp": 1753467523.594,
"endTimestamp": 1753467523.594,
"data": {
"jsHeapSizeLimit": 4294705152,
"totalJSHeapSize": 50331648,
"usedJSHeapSize": 30000000,
"memory": {
"jsHeapSizeLimit": 4294705152,
"totalJSHeapSize": 111507602,
"usedJSHeapSize": 69487254,
}
},
}
}
},
},
}

result = as_trace_item_context(EventType.MEMORY, event)
assert result is not None
assert result["timestamp"] == 1674298825.0
assert result["timestamp"] == 1753467523.594
assert result["attributes"]["category"] == "memory"
assert result["attributes"]["jsHeapSizeLimit"] == 4294705152
assert result["attributes"]["totalJSHeapSize"] == 50331648
assert result["attributes"]["usedJSHeapSize"] == 30000000
assert result["attributes"]["endTimestamp"] == 1674298826.5
assert result["attributes"]["totalJSHeapSize"] == 111507602
assert result["attributes"]["usedJSHeapSize"] == 69487254
assert result["attributes"]["endTimestamp"] == 1753467523.594
assert "event_hash" in result and len(result["event_hash"]) == 16


Expand Down
Loading