Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
get_start_span_function,
set_data_normalized,
normalize_message_roles,
truncate_and_annotate_messages,
)
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.scope import should_send_default_pii
Expand Down Expand Up @@ -61,12 +62,17 @@ def invoke_agent_span(context, agent, kwargs):

if len(messages) > 0:
normalized_messages = normalize_message_roles(messages)
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalized_messages,
unpack=False,
scope = sentry_sdk.get_current_scope()
messages_data = truncate_and_annotate_messages(
normalized_messages, span, scope
)
if messages_data is not None:
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
messages_data,
unpack=False,
)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Missing Data Normalization in Agent Span

The invoke_agent_span function is missing a call to _normalize_data() before passing normalized_messages to truncate_and_annotate_messages(). This differs from utils.py and means the truncation function receives Python objects instead of the expected serialized data, preventing it from working correctly. _normalize_data also needs to be imported.

Fix in Cursor Fix in Web


_set_agent_data(span, agent)

Expand Down
19 changes: 13 additions & 6 deletions sentry_sdk/integrations/openai_agents/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
normalize_message_roles,
set_data_normalized,
normalize_message_role,
truncate_and_annotate_messages,
_normalize_data,
)
from sentry_sdk.consts import SPANDATA, SPANSTATUS, OP
from sentry_sdk.integrations import DidNotEnable
Expand Down Expand Up @@ -135,12 +137,17 @@ def _set_input_data(span, get_response_kwargs):
}
)

set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalize_message_roles(request_messages),
unpack=False,
)
normalized_messages = normalize_message_roles(request_messages)
serializable_messages = _normalize_data(normalized_messages, unpack=False)
scope = sentry_sdk.get_current_scope()
messages_data = truncate_and_annotate_messages(serializable_messages, span, scope)
if messages_data is not None:
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
messages_data,
unpack=False,
)


def _set_output_data(span, result):
Expand Down
102 changes: 90 additions & 12 deletions tests/integrations/openai_agents/test_openai_agents.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
import asyncio
import json
import os
import re
import pytest
from unittest.mock import MagicMock, patch
import os

from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration
from sentry_sdk.integrations.openai_agents.utils import safe_serialize
from sentry_sdk.utils import parse_version

import agents
import pytest
from agents import (
Agent,
ModelResponse,
Usage,
ModelSettings,
Usage,
)
from agents.items import (
McpCall,
ResponseFunctionToolCall,
ResponseOutputMessage,
ResponseOutputText,
ResponseFunctionToolCall,
)
from agents.version import __version__ as OPENAI_AGENTS_VERSION

from openai.types.responses.response_usage import (
InputTokensDetails,
OutputTokensDetails,
)

from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration
from sentry_sdk.integrations.openai_agents.utils import safe_serialize
from sentry_sdk.utils import parse_version

test_run_config = agents.RunConfig(tracing_disabled=True)


Expand Down Expand Up @@ -1051,8 +1051,8 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events):

get_response_kwargs = {"input": test_input}

from sentry_sdk.integrations.openai_agents.utils import _set_input_data
from sentry_sdk import start_span
from sentry_sdk.integrations.openai_agents.utils import _set_input_data

with start_span(op="test") as span:
_set_input_data(span, get_response_kwargs)
Expand All @@ -1061,8 +1061,6 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events):
from sentry_sdk.consts import SPANDATA

if SPANDATA.GEN_AI_REQUEST_MESSAGES in span._data:
import json

stored_messages = json.loads(span._data[SPANDATA.GEN_AI_REQUEST_MESSAGES])

# Verify roles were properly mapped
Expand All @@ -1077,3 +1075,83 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events):
# Verify no "ai" roles remain in any message
for message in stored_messages:
assert message["role"] != "ai"


@pytest.mark.asyncio
async def test_openai_agents_message_truncation(
sentry_init, capture_events, test_agent, mock_usage
):
"""Test that large messages are truncated properly in OpenAI Agents integration."""
with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
with patch(
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
) as mock_get_response:
large_content = (
"This is a very long message that will exceed our size limits. " * 1000
)

large_response = ModelResponse(
output=[
ResponseOutputMessage(
id="msg_large",
type="message",
status="completed",
content=[
ResponseOutputText(
text=large_content,
type="output_text",
annotations=[],
)
],
role="assistant",
)
],
usage=mock_usage,
response_id="resp_large",
)

mock_get_response.return_value = large_response

sentry_init(
integrations=[OpenAIAgentsIntegration()],
traces_sample_rate=1.0,
send_default_pii=True,
)

events = capture_events()

# Create messages with mixed large/small content by patching get_response
with patch(
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
) as mock_inner:
mock_inner.side_effect = [large_response] * 5

# We'll test with the agent itself, not the messages
# since OpenAI agents tracks messages internally
result = await agents.Runner.run(
test_agent, "Test input", run_config=test_run_config
)

assert result is not None

assert len(events) > 0
tx = events[0]
assert tx["type"] == "transaction"

# Check ai_client spans (these have the truncation)
ai_client_spans = [
span for span in tx.get("spans", []) if span.get("op") == "gen_ai.chat"
]
assert len(ai_client_spans) > 0

# Just verify that messages are being set and truncation is applied
# The actual truncation behavior is tested in the ai_monitoring tests
ai_client_span = ai_client_spans[0]
if "gen_ai.request.messages" in ai_client_span["data"]:
messages_data = ai_client_span["data"]["gen_ai.request.messages"]
assert isinstance(messages_data, str)

parsed_messages = json.loads(messages_data)
assert isinstance(parsed_messages, list)
# Verify messages were processed
assert len(parsed_messages) >= 1
Comment on lines +1147 to +1157
Copy link
Contributor

@alexander-alderman-webb alexander-alderman-webb Oct 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this checking that truncation is applied?

I would have thought len(parsed_messages) >= 1 is always true, even without truncation.