@@ -831,6 +831,316 @@ async def run_test():
831831 assert invoke_agent_span ["data" ]["gen_ai.usage.total_tokens" ] == 50
832832
833833
834+ def test_pregel_invoke_span_includes_response_model (sentry_init , capture_events ):
835+ """
836+ Test that invoke_agent spans include the response model.
837+ When an agent makes multiple LLM calls, it should report the last model used.
838+ """
839+ sentry_init (
840+ integrations = [LanggraphIntegration ()],
841+ traces_sample_rate = 1.0 ,
842+ )
843+ events = capture_events ()
844+
845+ test_state = {
846+ "messages" : [
847+ MockMessage ("Hello, can you help me?" , name = "user" ),
848+ MockMessage ("Of course! How can I assist you?" , name = "assistant" ),
849+ ]
850+ }
851+
852+ pregel = MockPregelInstance ("test_graph" )
853+
854+ expected_assistant_response = "I'll help you with that task!"
855+ expected_tool_calls = [
856+ {
857+ "id" : "call_test_123" ,
858+ "type" : "function" ,
859+ "function" : {"name" : "search_tool" , "arguments" : '{"query": "help"}' },
860+ }
861+ ]
862+
863+ def original_invoke (self , * args , ** kwargs ):
864+ input_messages = args [0 ].get ("messages" , [])
865+ new_messages = input_messages + [
866+ MockMessage (
867+ content = expected_assistant_response ,
868+ name = "assistant" ,
869+ tool_calls = expected_tool_calls ,
870+ response_metadata = {
871+ "token_usage" : {
872+ "total_tokens" : 30 ,
873+ "prompt_tokens" : 10 ,
874+ "completion_tokens" : 20 ,
875+ },
876+ "model_name" : "gpt-4.1-2025-04-14" ,
877+ },
878+ )
879+ ]
880+ return {"messages" : new_messages }
881+
882+ with start_transaction ():
883+ wrapped_invoke = _wrap_pregel_invoke (original_invoke )
884+ result = wrapped_invoke (pregel , test_state )
885+
886+ assert result is not None
887+
888+ tx = events [0 ]
889+ assert tx ["type" ] == "transaction"
890+
891+ invoke_spans = [
892+ span for span in tx ["spans" ] if span ["op" ] == OP .GEN_AI_INVOKE_AGENT
893+ ]
894+ assert len (invoke_spans ) == 1
895+
896+ invoke_agent_span = invoke_spans [0 ]
897+
898+ # Verify invoke_agent span has response model
899+ assert invoke_agent_span ["description" ] == "invoke_agent test_graph"
900+ assert "gen_ai.response.model" in invoke_agent_span ["data" ]
901+ assert invoke_agent_span ["data" ]["gen_ai.response.model" ] == "gpt-4.1-2025-04-14"
902+
903+
904+ def test_pregel_ainvoke_span_includes_response_model (sentry_init , capture_events ):
905+ """
906+ Test that invoke_agent spans include the response model.
907+ When an agent makes multiple LLM calls, it should report the last model used.
908+ """
909+ sentry_init (
910+ integrations = [LanggraphIntegration ()],
911+ traces_sample_rate = 1.0 ,
912+ )
913+ events = capture_events ()
914+
915+ test_state = {
916+ "messages" : [
917+ MockMessage ("Hello, can you help me?" , name = "user" ),
918+ MockMessage ("Of course! How can I assist you?" , name = "assistant" ),
919+ ]
920+ }
921+
922+ pregel = MockPregelInstance ("test_graph" )
923+
924+ expected_assistant_response = "I'll help you with that task!"
925+ expected_tool_calls = [
926+ {
927+ "id" : "call_test_123" ,
928+ "type" : "function" ,
929+ "function" : {"name" : "search_tool" , "arguments" : '{"query": "help"}' },
930+ }
931+ ]
932+
933+ async def original_ainvoke (self , * args , ** kwargs ):
934+ input_messages = args [0 ].get ("messages" , [])
935+ new_messages = input_messages + [
936+ MockMessage (
937+ content = expected_assistant_response ,
938+ name = "assistant" ,
939+ tool_calls = expected_tool_calls ,
940+ response_metadata = {
941+ "token_usage" : {
942+ "total_tokens" : 30 ,
943+ "prompt_tokens" : 10 ,
944+ "completion_tokens" : 20 ,
945+ },
946+ "model_name" : "gpt-4.1-2025-04-14" ,
947+ },
948+ )
949+ ]
950+ return {"messages" : new_messages }
951+
952+ async def run_test ():
953+ with start_transaction ():
954+ wrapped_ainvoke = _wrap_pregel_ainvoke (original_ainvoke )
955+ result = await wrapped_ainvoke (pregel , test_state )
956+ return result
957+
958+ result = asyncio .run (run_test ())
959+ assert result is not None
960+
961+ tx = events [0 ]
962+ assert tx ["type" ] == "transaction"
963+
964+ invoke_spans = [
965+ span for span in tx ["spans" ] if span ["op" ] == OP .GEN_AI_INVOKE_AGENT
966+ ]
967+ assert len (invoke_spans ) == 1
968+
969+ invoke_agent_span = invoke_spans [0 ]
970+
971+ # Verify invoke_agent span has response model
972+ assert invoke_agent_span ["description" ] == "invoke_agent test_graph"
973+ assert "gen_ai.response.model" in invoke_agent_span ["data" ]
974+ assert invoke_agent_span ["data" ]["gen_ai.response.model" ] == "gpt-4.1-2025-04-14"
975+
976+
977+ def test_pregel_invoke_span_uses_last_response_model (sentry_init , capture_events ):
978+ """
979+ Test that when an agent makes multiple LLM calls (e.g., with tools),
980+ the invoke_agent span reports the last response model used.
981+ """
982+ sentry_init (
983+ integrations = [LanggraphIntegration ()],
984+ traces_sample_rate = 1.0 ,
985+ )
986+ events = capture_events ()
987+
988+ test_state = {
989+ "messages" : [
990+ MockMessage ("Hello, can you help me?" , name = "user" ),
991+ MockMessage ("Of course! How can I assist you?" , name = "assistant" ),
992+ ]
993+ }
994+
995+ pregel = MockPregelInstance ("test_graph" )
996+
997+ expected_assistant_response = "I'll help you with that task!"
998+ expected_tool_calls = [
999+ {
1000+ "id" : "call_test_123" ,
1001+ "type" : "function" ,
1002+ "function" : {"name" : "search_tool" , "arguments" : '{"query": "help"}' },
1003+ }
1004+ ]
1005+
1006+ def original_invoke (self , * args , ** kwargs ):
1007+ input_messages = args [0 ].get ("messages" , [])
1008+ new_messages = input_messages + [
1009+ MockMessage (
1010+ content = expected_assistant_response ,
1011+ name = "assistant" ,
1012+ tool_calls = expected_tool_calls ,
1013+ response_metadata = {
1014+ "token_usage" : {
1015+ "total_tokens" : 15 ,
1016+ "prompt_tokens" : 10 ,
1017+ "completion_tokens" : 5 ,
1018+ },
1019+ "model_name" : "gpt-4-0613" ,
1020+ },
1021+ ),
1022+ MockMessage (
1023+ content = expected_assistant_response ,
1024+ name = "assistant" ,
1025+ tool_calls = expected_tool_calls ,
1026+ response_metadata = {
1027+ "token_usage" : {
1028+ "total_tokens" : 35 ,
1029+ "prompt_tokens" : 20 ,
1030+ "completion_tokens" : 15 ,
1031+ },
1032+ "model_name" : "gpt-4.1-2025-04-14" ,
1033+ },
1034+ ),
1035+ ]
1036+ return {"messages" : new_messages }
1037+
1038+ with start_transaction ():
1039+ wrapped_invoke = _wrap_pregel_invoke (original_invoke )
1040+ result = wrapped_invoke (pregel , test_state )
1041+
1042+ assert result is not None
1043+
1044+ tx = events [0 ]
1045+ assert tx ["type" ] == "transaction"
1046+
1047+ invoke_spans = [
1048+ span for span in tx ["spans" ] if span ["op" ] == OP .GEN_AI_INVOKE_AGENT
1049+ ]
1050+ assert len (invoke_spans ) == 1
1051+
1052+ invoke_agent_span = invoke_spans [0 ]
1053+
1054+ # Verify invoke_agent span uses the LAST response model
1055+ assert "gen_ai.response.model" in invoke_agent_span ["data" ]
1056+ assert invoke_agent_span ["data" ]["gen_ai.response.model" ] == "gpt-4.1-2025-04-14"
1057+
1058+
1059+ def test_pregel_ainvoke_span_uses_last_response_model (sentry_init , capture_events ):
1060+ """
1061+ Test that when an agent makes multiple LLM calls (e.g., with tools),
1062+ the invoke_agent span reports the last response model used.
1063+ """
1064+ sentry_init (
1065+ integrations = [LanggraphIntegration ()],
1066+ traces_sample_rate = 1.0 ,
1067+ )
1068+ events = capture_events ()
1069+
1070+ test_state = {
1071+ "messages" : [
1072+ MockMessage ("Hello, can you help me?" , name = "user" ),
1073+ MockMessage ("Of course! How can I assist you?" , name = "assistant" ),
1074+ ]
1075+ }
1076+
1077+ pregel = MockPregelInstance ("test_graph" )
1078+
1079+ expected_assistant_response = "I'll help you with that task!"
1080+ expected_tool_calls = [
1081+ {
1082+ "id" : "call_test_123" ,
1083+ "type" : "function" ,
1084+ "function" : {"name" : "search_tool" , "arguments" : '{"query": "help"}' },
1085+ }
1086+ ]
1087+
1088+ async def original_ainvoke (self , * args , ** kwargs ):
1089+ input_messages = args [0 ].get ("messages" , [])
1090+ new_messages = input_messages + [
1091+ MockMessage (
1092+ content = expected_assistant_response ,
1093+ name = "assistant" ,
1094+ tool_calls = expected_tool_calls ,
1095+ response_metadata = {
1096+ "token_usage" : {
1097+ "total_tokens" : 15 ,
1098+ "prompt_tokens" : 10 ,
1099+ "completion_tokens" : 5 ,
1100+ },
1101+ "model_name" : "gpt-4-0613" ,
1102+ },
1103+ ),
1104+ MockMessage (
1105+ content = expected_assistant_response ,
1106+ name = "assistant" ,
1107+ tool_calls = expected_tool_calls ,
1108+ response_metadata = {
1109+ "token_usage" : {
1110+ "total_tokens" : 35 ,
1111+ "prompt_tokens" : 20 ,
1112+ "completion_tokens" : 15 ,
1113+ },
1114+ "model_name" : "gpt-4.1-2025-04-14" ,
1115+ },
1116+ ),
1117+ ]
1118+ return {"messages" : new_messages }
1119+
1120+ async def run_test ():
1121+ with start_transaction ():
1122+ wrapped_ainvoke = _wrap_pregel_ainvoke (original_ainvoke )
1123+ result = await wrapped_ainvoke (pregel , test_state )
1124+ return result
1125+
1126+ result = asyncio .run (run_test ())
1127+ assert result is not None
1128+
1129+ tx = events [0 ]
1130+ assert tx ["type" ] == "transaction"
1131+
1132+ invoke_spans = [
1133+ span for span in tx ["spans" ] if span ["op" ] == OP .GEN_AI_INVOKE_AGENT
1134+ ]
1135+ assert len (invoke_spans ) == 1
1136+
1137+ invoke_agent_span = invoke_spans [0 ]
1138+
1139+ # Verify invoke_agent span uses the LAST response model
1140+ assert "gen_ai.response.model" in invoke_agent_span ["data" ]
1141+ assert invoke_agent_span ["data" ]["gen_ai.response.model" ] == "gpt-4.1-2025-04-14"
1142+
1143+
8341144def test_complex_message_parsing ():
8351145 """Test message parsing with complex message structures."""
8361146 messages = [
0 commit comments