diff --git a/CHANGELOG.md b/CHANGELOG.md index 96aaa67d..59d1ba0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 7.2.0 - 2025-11-28 + +Capture Langchain, OpenAI and Anthropic errors as exceptions (if exception autocapture is enabled) +Add reference to exception in LLMA trace and span events + # 7.1.0 - 2025-11-26 Add support for the async version of Gemini. diff --git a/posthog/ai/langchain/callbacks.py b/posthog/ai/langchain/callbacks.py index 68840a63..db493405 100644 --- a/posthog/ai/langchain/callbacks.py +++ b/posthog/ai/langchain/callbacks.py @@ -22,8 +22,8 @@ try: # LangChain 1.0+ and modern 0.x with langchain-core - from langchain_core.callbacks.base import BaseCallbackHandler from langchain_core.agents import AgentAction, AgentFinish + from langchain_core.callbacks.base import BaseCallbackHandler except (ImportError, ModuleNotFoundError): # Fallback for older LangChain versions from langchain.callbacks.base import BaseCallbackHandler @@ -35,15 +35,15 @@ FunctionMessage, HumanMessage, SystemMessage, - ToolMessage, ToolCall, + ToolMessage, ) from langchain_core.outputs import ChatGeneration, LLMResult from pydantic import BaseModel from posthog import setup -from posthog.ai.utils import get_model_params, with_privacy_mode from posthog.ai.sanitization import sanitize_langchain +from posthog.ai.utils import get_model_params, with_privacy_mode from posthog.client import Client log = logging.getLogger("posthog") @@ -506,6 +506,14 @@ def _capture_trace_or_span( if isinstance(outputs, BaseException): event_properties["$ai_error"] = _stringify_exception(outputs) event_properties["$ai_is_error"] = True + event_properties = _capture_exception_and_update_properties( + self._ph_client, + outputs, + self._distinct_id, + self._groups, + event_properties, + ) + elif outputs is not None: event_properties["$ai_output_state"] = with_privacy_mode( self._ph_client, self._privacy_mode, outputs @@ -576,10 +584,24 @@ def _capture_generation( if run.tools: event_properties["$ai_tools"] = run.tools + if self._properties: + event_properties.update(self._properties) + + if self._distinct_id is None: + event_properties["$process_person_profile"] = False + if isinstance(output, BaseException): event_properties["$ai_http_status"] = _get_http_status(output) event_properties["$ai_error"] = _stringify_exception(output) event_properties["$ai_is_error"] = True + + event_properties = _capture_exception_and_update_properties( + self._ph_client, + output, + self._distinct_id, + self._groups, + event_properties, + ) else: # Add usage usage = _parse_usage(output, run.provider, run.model) @@ -607,12 +629,6 @@ def _capture_generation( self._ph_client, self._privacy_mode, completions ) - if self._properties: - event_properties.update(self._properties) - - if self._distinct_id is None: - event_properties["$process_person_profile"] = False - self._ph_client.capture( distinct_id=self._distinct_id or trace_id, event="$ai_generation", @@ -861,6 +877,27 @@ def _parse_usage( return llm_usage +def _capture_exception_and_update_properties( + client: Client, + exception: BaseException, + distinct_id: Optional[Union[str, int, UUID]], + groups: Optional[Dict[str, Any]], + event_properties: Dict[str, Any], +): + if client.enable_exception_autocapture: + exception_id = client.capture_exception( + exception, + distinct_id=distinct_id, + groups=groups, + properties=event_properties, + ) + + if exception_id: + event_properties["$exception_event_id"] = exception_id + + return event_properties + + def _get_http_status(error: BaseException) -> int: # OpenAI: https://github.com/openai/openai-python/blob/main/src/openai/_exceptions.py # Anthropic: https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/_exceptions.py diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py index 559860cc..a7a8bdb6 100644 --- a/posthog/ai/utils.py +++ b/posthog/ai/utils.py @@ -2,14 +2,15 @@ import uuid from typing import Any, Callable, Dict, List, Optional, cast -from posthog.client import Client as PostHogClient -from posthog.ai.types import FormattedMessage, StreamingEventData, TokenUsage +from posthog import identify_context, new_context, tag from posthog.ai.sanitization import ( - sanitize_openai, sanitize_anthropic, sanitize_gemini, sanitize_langchain, + sanitize_openai, ) +from posthog.ai.types import FormattedMessage, StreamingEventData, TokenUsage +from posthog.client import Client as PostHogClient def merge_usage_stats( @@ -256,94 +257,104 @@ def call_llm_and_track_usage( usage: TokenUsage = TokenUsage() error_params: Dict[str, Any] = {} - try: - response = call_method(**kwargs) - except Exception as exc: - error = exc - http_status = getattr( - exc, "status_code", 0 - ) # default to 0 becuase its likely an SDK error - error_params = { - "$ai_is_error": True, - "$ai_error": exc.__str__(), - } - finally: - end_time = time.time() - latency = end_time - start_time - - if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) - - if response and ( - hasattr(response, "usage") - or (provider == "gemini" and hasattr(response, "usage_metadata")) - ): - usage = get_usage(response, provider) - - messages = merge_system_prompt(kwargs, provider) - sanitized_messages = sanitize_messages(messages, provider) - - event_properties = { - "$ai_provider": provider, - "$ai_model": kwargs.get("model"), - "$ai_model_parameters": get_model_params(kwargs), - "$ai_input": with_privacy_mode( - ph_client, posthog_privacy_mode, sanitized_messages - ), - "$ai_output_choices": with_privacy_mode( - ph_client, posthog_privacy_mode, format_response(response, provider) - ), - "$ai_http_status": http_status, - "$ai_input_tokens": usage.get("input_tokens", 0), - "$ai_output_tokens": usage.get("output_tokens", 0), - "$ai_latency": latency, - "$ai_trace_id": posthog_trace_id, - "$ai_base_url": str(base_url), - **(posthog_properties or {}), - **(error_params or {}), - } - - available_tool_calls = extract_available_tool_calls(provider, kwargs) - - if available_tool_calls: - event_properties["$ai_tools"] = available_tool_calls - - cache_read = usage.get("cache_read_input_tokens") - if cache_read is not None and cache_read > 0: - event_properties["$ai_cache_read_input_tokens"] = cache_read - - cache_creation = usage.get("cache_creation_input_tokens") - if cache_creation is not None and cache_creation > 0: - event_properties["$ai_cache_creation_input_tokens"] = cache_creation - - reasoning = usage.get("reasoning_tokens") - if reasoning is not None and reasoning > 0: - event_properties["$ai_reasoning_tokens"] = reasoning - - web_search_count = usage.get("web_search_count") - if web_search_count is not None and web_search_count > 0: - event_properties["$ai_web_search_count"] = web_search_count - - if posthog_distinct_id is None: - event_properties["$process_person_profile"] = False - - # Process instructions for Responses API - if provider == "openai" and kwargs.get("instructions") is not None: - event_properties["$ai_instructions"] = with_privacy_mode( - ph_client, posthog_privacy_mode, kwargs.get("instructions") + with new_context(client=ph_client): + if posthog_distinct_id: + identify_context(posthog_distinct_id) + + try: + response = call_method(**kwargs) + except Exception as exc: + error = exc + http_status = getattr( + exc, "status_code", 0 + ) # default to 0 becuase its likely an SDK error + error_params = { + "$ai_is_error": True, + "$ai_error": exc.__str__(), + } + finally: + end_time = time.time() + latency = end_time - start_time + + if posthog_trace_id is None: + posthog_trace_id = str(uuid.uuid4()) + + if response and ( + hasattr(response, "usage") + or (provider == "gemini" and hasattr(response, "usage_metadata")) + ): + usage = get_usage(response, provider) + + messages = merge_system_prompt(kwargs, provider) + sanitized_messages = sanitize_messages(messages, provider) + + tag("$ai_provider", provider) + tag("$ai_model", kwargs.get("model")) + tag("$ai_model_parameters", get_model_params(kwargs)) + tag( + "$ai_input", + with_privacy_mode(ph_client, posthog_privacy_mode, sanitized_messages), ) - - # send the event to posthog - if hasattr(ph_client, "capture") and callable(ph_client.capture): - ph_client.capture( - distinct_id=posthog_distinct_id or posthog_trace_id, - event="$ai_generation", - properties=event_properties, - groups=posthog_groups, + tag( + "$ai_output_choices", + with_privacy_mode( + ph_client, posthog_privacy_mode, format_response(response, provider) + ), ) + tag("$ai_http_status", http_status) + tag("$ai_input_tokens", usage.get("input_tokens", 0)) + tag("$ai_output_tokens", usage.get("output_tokens", 0)) + tag("$ai_latency", latency) + tag("$ai_trace_id", posthog_trace_id) + tag("$ai_base_url", str(base_url)) + + available_tool_calls = extract_available_tool_calls(provider, kwargs) + + if available_tool_calls: + tag("$ai_tools", available_tool_calls) + + cache_read = usage.get("cache_read_input_tokens") + if cache_read is not None and cache_read > 0: + tag("$ai_cache_read_input_tokens", cache_read) + + cache_creation = usage.get("cache_creation_input_tokens") + if cache_creation is not None and cache_creation > 0: + tag("$ai_cache_creation_input_tokens", cache_creation) + + reasoning = usage.get("reasoning_tokens") + if reasoning is not None and reasoning > 0: + tag("$ai_reasoning_tokens", reasoning) + + web_search_count = usage.get("web_search_count") + if web_search_count is not None and web_search_count > 0: + tag("$ai_web_search_count", web_search_count) + + if posthog_distinct_id is None: + tag("$process_person_profile", False) + + # Process instructions for Responses API + if provider == "openai" and kwargs.get("instructions") is not None: + tag( + "$ai_instructions", + with_privacy_mode( + ph_client, posthog_privacy_mode, kwargs.get("instructions") + ), + ) + + # send the event to posthog + if hasattr(ph_client, "capture") and callable(ph_client.capture): + ph_client.capture( + distinct_id=posthog_distinct_id or posthog_trace_id, + event="$ai_generation", + properties={ + **(posthog_properties or {}), + **(error_params or {}), + }, + groups=posthog_groups, + ) - if error: - raise error + if error: + raise error return response @@ -367,94 +378,104 @@ async def call_llm_and_track_usage_async( usage: TokenUsage = TokenUsage() error_params: Dict[str, Any] = {} - try: - response = await call_async_method(**kwargs) - except Exception as exc: - error = exc - http_status = getattr( - exc, "status_code", 0 - ) # default to 0 because its likely an SDK error - error_params = { - "$ai_is_error": True, - "$ai_error": exc.__str__(), - } - finally: - end_time = time.time() - latency = end_time - start_time - - if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) - - if response and ( - hasattr(response, "usage") - or (provider == "gemini" and hasattr(response, "usage_metadata")) - ): - usage = get_usage(response, provider) - - messages = merge_system_prompt(kwargs, provider) - sanitized_messages = sanitize_messages(messages, provider) - - event_properties = { - "$ai_provider": provider, - "$ai_model": kwargs.get("model"), - "$ai_model_parameters": get_model_params(kwargs), - "$ai_input": with_privacy_mode( - ph_client, posthog_privacy_mode, sanitized_messages - ), - "$ai_output_choices": with_privacy_mode( - ph_client, posthog_privacy_mode, format_response(response, provider) - ), - "$ai_http_status": http_status, - "$ai_input_tokens": usage.get("input_tokens", 0), - "$ai_output_tokens": usage.get("output_tokens", 0), - "$ai_latency": latency, - "$ai_trace_id": posthog_trace_id, - "$ai_base_url": str(base_url), - **(posthog_properties or {}), - **(error_params or {}), - } - - available_tool_calls = extract_available_tool_calls(provider, kwargs) - - if available_tool_calls: - event_properties["$ai_tools"] = available_tool_calls - - cache_read = usage.get("cache_read_input_tokens") - if cache_read is not None and cache_read > 0: - event_properties["$ai_cache_read_input_tokens"] = cache_read - - cache_creation = usage.get("cache_creation_input_tokens") - if cache_creation is not None and cache_creation > 0: - event_properties["$ai_cache_creation_input_tokens"] = cache_creation - - reasoning = usage.get("reasoning_tokens") - if reasoning is not None and reasoning > 0: - event_properties["$ai_reasoning_tokens"] = reasoning - - web_search_count = usage.get("web_search_count") - if web_search_count is not None and web_search_count > 0: - event_properties["$ai_web_search_count"] = web_search_count - - if posthog_distinct_id is None: - event_properties["$process_person_profile"] = False - - # Process instructions for Responses API - if provider == "openai" and kwargs.get("instructions") is not None: - event_properties["$ai_instructions"] = with_privacy_mode( - ph_client, posthog_privacy_mode, kwargs.get("instructions") + with new_context(client=ph_client): + if posthog_distinct_id: + identify_context(posthog_distinct_id) + + try: + response = await call_async_method(**kwargs) + except Exception as exc: + error = exc + http_status = getattr( + exc, "status_code", 0 + ) # default to 0 because its likely an SDK error + error_params = { + "$ai_is_error": True, + "$ai_error": exc.__str__(), + } + finally: + end_time = time.time() + latency = end_time - start_time + + if posthog_trace_id is None: + posthog_trace_id = str(uuid.uuid4()) + + if response and ( + hasattr(response, "usage") + or (provider == "gemini" and hasattr(response, "usage_metadata")) + ): + usage = get_usage(response, provider) + + messages = merge_system_prompt(kwargs, provider) + sanitized_messages = sanitize_messages(messages, provider) + + tag("$ai_provider", provider) + tag("$ai_model", kwargs.get("model")) + tag("$ai_model_parameters", get_model_params(kwargs)) + tag( + "$ai_input", + with_privacy_mode(ph_client, posthog_privacy_mode, sanitized_messages), ) - - # send the event to posthog - if hasattr(ph_client, "capture") and callable(ph_client.capture): - ph_client.capture( - distinct_id=posthog_distinct_id or posthog_trace_id, - event="$ai_generation", - properties=event_properties, - groups=posthog_groups, + tag( + "$ai_output_choices", + with_privacy_mode( + ph_client, posthog_privacy_mode, format_response(response, provider) + ), ) + tag("$ai_http_status", http_status) + tag("$ai_input_tokens", usage.get("input_tokens", 0)) + tag("$ai_output_tokens", usage.get("output_tokens", 0)) + tag("$ai_latency", latency) + tag("$ai_trace_id", posthog_trace_id) + tag("$ai_base_url", str(base_url)) + + available_tool_calls = extract_available_tool_calls(provider, kwargs) + + if available_tool_calls: + tag("$ai_tools", available_tool_calls) + + cache_read = usage.get("cache_read_input_tokens") + if cache_read is not None and cache_read > 0: + tag("$ai_cache_read_input_tokens", cache_read) + + cache_creation = usage.get("cache_creation_input_tokens") + if cache_creation is not None and cache_creation > 0: + tag("$ai_cache_creation_input_tokens", cache_creation) + + reasoning = usage.get("reasoning_tokens") + if reasoning is not None and reasoning > 0: + tag("$ai_reasoning_tokens", reasoning) + + web_search_count = usage.get("web_search_count") + if web_search_count is not None and web_search_count > 0: + tag("$ai_web_search_count", web_search_count) + + if posthog_distinct_id is None: + tag("$process_person_profile", False) + + # Process instructions for Responses API + if provider == "openai" and kwargs.get("instructions") is not None: + tag( + "$ai_instructions", + with_privacy_mode( + ph_client, posthog_privacy_mode, kwargs.get("instructions") + ), + ) + + # send the event to posthog + if hasattr(ph_client, "capture") and callable(ph_client.capture): + ph_client.capture( + distinct_id=posthog_distinct_id or posthog_trace_id, + event="$ai_generation", + properties={ + **(posthog_properties or {}), + **(error_params or {}), + }, + groups=posthog_groups, + ) - if error: - raise error + if error: + raise error return response diff --git a/posthog/client.py b/posthog/client.py index 3c4d8b89..bb11bf99 100644 --- a/posthog/client.py +++ b/posthog/client.py @@ -4,24 +4,33 @@ import sys from datetime import datetime, timedelta from typing import Any, Dict, Optional, Union -from typing_extensions import Unpack from uuid import uuid4 from dateutil.tz import tzutc from six import string_types +from typing_extensions import Unpack -from posthog.args import OptionalCaptureArgs, OptionalSetArgs, ID_TYPES, ExceptionArg +from posthog.args import ID_TYPES, ExceptionArg, OptionalCaptureArgs, OptionalSetArgs from posthog.consumer import Consumer +from posthog.contexts import ( + _get_current_context, + get_capture_exception_code_variables_context, + get_code_variables_ignore_patterns_context, + get_code_variables_mask_patterns_context, + get_context_distinct_id, + get_context_session_id, + new_context, +) from posthog.exception_capture import ExceptionCapture from posthog.exception_utils import ( + DEFAULT_CODE_VARIABLES_IGNORE_PATTERNS, + DEFAULT_CODE_VARIABLES_MASK_PATTERNS, exc_info_from_error, + exception_is_already_captured, exceptions_from_error_tuple, handle_in_app, - exception_is_already_captured, mark_exception_as_captured, try_attach_code_variables_to_frames, - DEFAULT_CODE_VARIABLES_MASK_PATTERNS, - DEFAULT_CODE_VARIABLES_IGNORE_PATTERNS, ) from posthog.feature_flags import ( InconclusiveMatchError, @@ -38,15 +47,6 @@ get, remote_config, ) -from posthog.contexts import ( - _get_current_context, - get_context_distinct_id, - get_context_session_id, - get_capture_exception_code_variables_context, - get_code_variables_mask_patterns_context, - get_code_variables_ignore_patterns_context, - new_context, -) from posthog.types import ( FeatureFlag, FeatureFlagResult, @@ -2016,9 +2016,9 @@ def _initialize_flag_cache(self, cache_url): return None try: - from urllib.parse import urlparse, parse_qs + from urllib.parse import parse_qs, urlparse except ImportError: - from urlparse import urlparse, parse_qs + from urlparse import parse_qs, urlparse try: parsed = urlparse(cache_url) diff --git a/posthog/test/ai/test_system_prompts.py b/posthog/test/ai/test_system_prompts.py index 2f37ccc4..eb049c86 100644 --- a/posthog/test/ai/test_system_prompts.py +++ b/posthog/test/ai/test_system_prompts.py @@ -11,7 +11,10 @@ import time import unittest -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock, patch + +from posthog.client import Client +from posthog.test.test_utils import FAKE_TEST_API_KEY class TestSystemPromptCapture(unittest.TestCase): @@ -24,7 +27,8 @@ def setUp(self): self.test_response = "I'm doing well, thank you!" # Create mock PostHog client - self.client = MagicMock() + self.client = Client(FAKE_TEST_API_KEY) + self.client._enqueue = MagicMock() self.client.privacy_mode = False def _assert_system_prompt_captured(self, captured_input): @@ -53,10 +57,11 @@ def _assert_system_prompt_captured(self, captured_input): def test_openai_messages_array_system_prompt(self): """Test OpenAI with system prompt in messages array.""" try: - from posthog.ai.openai import OpenAI from openai.types.chat import ChatCompletion, ChatCompletionMessage from openai.types.chat.chat_completion import Choice from openai.types.completion_usage import CompletionUsage + + from posthog.ai.openai import OpenAI except ImportError: self.skipTest("OpenAI package not available") @@ -94,17 +99,18 @@ def test_openai_messages_array_system_prompt(self): model="gpt-4", messages=messages, posthog_distinct_id="test-user" ) - self.assertEqual(len(self.client.capture.call_args_list), 1) - properties = self.client.capture.call_args_list[0][1]["properties"] + self.assertEqual(len(self.client._enqueue.call_args_list), 1) + properties = self.client._enqueue.call_args_list[0][0][0]["properties"] self._assert_system_prompt_captured(properties["$ai_input"]) def test_openai_separate_system_parameter(self): """Test OpenAI with system prompt as separate parameter.""" try: - from posthog.ai.openai import OpenAI from openai.types.chat import ChatCompletion, ChatCompletionMessage from openai.types.chat.chat_completion import Choice from openai.types.completion_usage import CompletionUsage + + from posthog.ai.openai import OpenAI except ImportError: self.skipTest("OpenAI package not available") @@ -142,18 +148,21 @@ def test_openai_separate_system_parameter(self): posthog_distinct_id="test-user", ) - self.assertEqual(len(self.client.capture.call_args_list), 1) - properties = self.client.capture.call_args_list[0][1]["properties"] + self.assertEqual(len(self.client._enqueue.call_args_list), 1) + properties = self.client._enqueue.call_args_list[0][0][0]["properties"] self._assert_system_prompt_captured(properties["$ai_input"]) def test_openai_streaming_system_parameter(self): """Test OpenAI streaming with system parameter.""" try: - from posthog.ai.openai import OpenAI - from openai.types.chat.chat_completion_chunk import ChatCompletionChunk + from openai.types.chat.chat_completion_chunk import ( + ChatCompletionChunk, + ChoiceDelta, + ) from openai.types.chat.chat_completion_chunk import Choice as ChoiceChunk - from openai.types.chat.chat_completion_chunk import ChoiceDelta from openai.types.completion_usage import CompletionUsage + + from posthog.ai.openai import OpenAI except ImportError: self.skipTest("OpenAI package not available") @@ -206,8 +215,8 @@ def test_openai_streaming_system_parameter(self): list(response_generator) # Consume generator - self.assertEqual(len(self.client.capture.call_args_list), 1) - properties = self.client.capture.call_args_list[0][1]["properties"] + self.assertEqual(len(self.client._enqueue.call_args_list), 1) + properties = self.client._enqueue.call_args_list[0][0][0]["properties"] self._assert_system_prompt_captured(properties["$ai_input"]) # Anthropic Tests @@ -239,8 +248,8 @@ def test_anthropic_messages_array_system_prompt(self): posthog_distinct_id="test-user", ) - self.assertEqual(len(self.client.capture.call_args_list), 1) - properties = self.client.capture.call_args_list[0][1]["properties"] + self.assertEqual(len(self.client._enqueue.call_args_list), 1) + properties = self.client._enqueue.call_args_list[0][0][0]["properties"] self._assert_system_prompt_captured(properties["$ai_input"]) def test_anthropic_separate_system_parameter(self): @@ -269,8 +278,8 @@ def test_anthropic_separate_system_parameter(self): posthog_distinct_id="test-user", ) - self.assertEqual(len(self.client.capture.call_args_list), 1) - properties = self.client.capture.call_args_list[0][1]["properties"] + self.assertEqual(len(self.client._enqueue.call_args_list), 1) + properties = self.client._enqueue.call_args_list[0][0][0]["properties"] self._assert_system_prompt_captured(properties["$ai_input"]) # Gemini Tests @@ -310,8 +319,8 @@ def test_gemini_contents_array_system_prompt(self): posthog_distinct_id="test-user", ) - self.assertEqual(len(self.client.capture.call_args_list), 1) - properties = self.client.capture.call_args_list[0][1]["properties"] + self.assertEqual(len(self.client._enqueue.call_args_list), 1) + properties = self.client._enqueue.call_args_list[0][0][0]["properties"] self._assert_system_prompt_captured(properties["$ai_input"]) def test_gemini_system_instruction_parameter(self): @@ -349,6 +358,6 @@ def test_gemini_system_instruction_parameter(self): posthog_distinct_id="test-user", ) - self.assertEqual(len(self.client.capture.call_args_list), 1) - properties = self.client.capture.call_args_list[0][1]["properties"] + self.assertEqual(len(self.client._enqueue.call_args_list), 1) + properties = self.client._enqueue.call_args_list[0][0][0]["properties"] self._assert_system_prompt_captured(properties["$ai_input"]) diff --git a/posthog/version.py b/posthog/version.py index 75b03c92..bb603866 100644 --- a/posthog/version.py +++ b/posthog/version.py @@ -1,4 +1,4 @@ -VERSION = "7.1.0" +VERSION = "7.2.0" if __name__ == "__main__": print(VERSION, end="") # noqa: T201