Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# 7.4.1 - 2025-12-19

fix: extract model from response for OpenAI stored prompts

When using OpenAI stored prompts, the model is defined in the OpenAI dashboard rather than passed in the API request. This fix adds a fallback to extract the model from the response object when not provided in kwargs, ensuring generations show up with the correct model and enabling cost calculations.

# 7.4.0 - 2025-12-16

feat: Add automatic retries for feature flag requests
Expand Down
29 changes: 27 additions & 2 deletions posthog/ai/openai/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,23 @@ def _create_streaming(
start_time = time.time()
usage_stats: TokenUsage = TokenUsage()
final_content = []
model_from_response: Optional[str] = None
response = self._original.create(**kwargs)

def generator():
nonlocal usage_stats
nonlocal final_content # noqa: F824
nonlocal model_from_response

try:
for chunk in response:
# Extract model from response object in chunk (for stored prompts)
if hasattr(chunk, "response") and chunk.response:
if model_from_response is None and hasattr(
chunk.response, "model"
):
model_from_response = chunk.response.model

# Extract usage stats from chunk
chunk_usage = extract_openai_usage_from_chunk(chunk, "responses")

Expand Down Expand Up @@ -161,6 +170,7 @@ def generator():
latency,
output,
None, # Responses API doesn't have tools
model_from_response,
)

return generator()
Expand All @@ -177,6 +187,7 @@ def _capture_streaming_event(
latency: float,
output: Any,
available_tool_calls: Optional[List[Dict[str, Any]]] = None,
model_from_response: Optional[str] = None,
):
from posthog.ai.types import StreamingEventData
from posthog.ai.openai.openai_converter import (
Expand All @@ -189,9 +200,12 @@ def _capture_streaming_event(
formatted_input = format_openai_streaming_input(kwargs, "responses")
sanitized_input = sanitize_openai_response(formatted_input)

# Use model from kwargs, fallback to model from response
model = kwargs.get("model") or model_from_response or "unknown"

event_data = StreamingEventData(
provider="openai",
model=kwargs.get("model", "unknown"),
model=model,
base_url=str(self._client.base_url),
kwargs=kwargs,
formatted_input=sanitized_input,
Expand Down Expand Up @@ -320,6 +334,7 @@ def _create_streaming(
usage_stats: TokenUsage = TokenUsage()
accumulated_content = []
accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
model_from_response: Optional[str] = None
if "stream_options" not in kwargs:
kwargs["stream_options"] = {}
kwargs["stream_options"]["include_usage"] = True
Expand All @@ -329,9 +344,14 @@ def generator():
nonlocal usage_stats
nonlocal accumulated_content # noqa: F824
nonlocal accumulated_tool_calls
nonlocal model_from_response

try:
for chunk in response:
# Extract model from chunk (Chat Completions chunks have model field)
if model_from_response is None and hasattr(chunk, "model"):
model_from_response = chunk.model

# Extract usage stats from chunk
chunk_usage = extract_openai_usage_from_chunk(chunk, "chat")

Expand Down Expand Up @@ -376,6 +396,7 @@ def generator():
accumulated_content,
tool_calls_list,
extract_available_tool_calls("openai", kwargs),
model_from_response,
)

return generator()
Expand All @@ -393,6 +414,7 @@ def _capture_streaming_event(
output: Any,
tool_calls: Optional[List[Dict[str, Any]]] = None,
available_tool_calls: Optional[List[Dict[str, Any]]] = None,
model_from_response: Optional[str] = None,
):
from posthog.ai.types import StreamingEventData
from posthog.ai.openai.openai_converter import (
Expand All @@ -405,9 +427,12 @@ def _capture_streaming_event(
formatted_input = format_openai_streaming_input(kwargs, "chat")
sanitized_input = sanitize_openai(formatted_input)

# Use model from kwargs, fallback to model from response
model = kwargs.get("model") or model_from_response or "unknown"

event_data = StreamingEventData(
provider="openai",
model=kwargs.get("model", "unknown"),
model=model,
base_url=str(self._client.base_url),
kwargs=kwargs,
formatted_input=sanitized_input,
Expand Down
29 changes: 27 additions & 2 deletions posthog/ai/openai/openai_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,23 @@ async def _create_streaming(
start_time = time.time()
usage_stats: TokenUsage = TokenUsage()
final_content = []
model_from_response: Optional[str] = None
response = await self._original.create(**kwargs)

async def async_generator():
nonlocal usage_stats
nonlocal final_content # noqa: F824
nonlocal model_from_response

try:
async for chunk in response:
# Extract model from response object in chunk (for stored prompts)
if hasattr(chunk, "response") and chunk.response:
if model_from_response is None and hasattr(
chunk.response, "model"
):
model_from_response = chunk.response.model

# Extract usage stats from chunk
chunk_usage = extract_openai_usage_from_chunk(chunk, "responses")

Expand Down Expand Up @@ -166,6 +175,7 @@ async def async_generator():
latency,
output,
extract_available_tool_calls("openai", kwargs),
model_from_response,
)

return async_generator()
Expand All @@ -182,13 +192,17 @@ async def _capture_streaming_event(
latency: float,
output: Any,
available_tool_calls: Optional[List[Dict[str, Any]]] = None,
model_from_response: Optional[str] = None,
):
if posthog_trace_id is None:
posthog_trace_id = str(uuid.uuid4())

# Use model from kwargs, fallback to model from response
model = kwargs.get("model") or model_from_response or "unknown"

event_properties = {
"$ai_provider": "openai",
"$ai_model": kwargs.get("model"),
"$ai_model": model,
"$ai_model_parameters": get_model_params(kwargs),
"$ai_input": with_privacy_mode(
self._client._ph_client,
Expand Down Expand Up @@ -350,6 +364,7 @@ async def _create_streaming(
usage_stats: TokenUsage = TokenUsage()
accumulated_content = []
accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
model_from_response: Optional[str] = None

if "stream_options" not in kwargs:
kwargs["stream_options"] = {}
Expand All @@ -360,9 +375,14 @@ async def async_generator():
nonlocal usage_stats
nonlocal accumulated_content # noqa: F824
nonlocal accumulated_tool_calls
nonlocal model_from_response

try:
async for chunk in response:
# Extract model from chunk (Chat Completions chunks have model field)
if model_from_response is None and hasattr(chunk, "model"):
model_from_response = chunk.model

# Extract usage stats from chunk
chunk_usage = extract_openai_usage_from_chunk(chunk, "chat")
if chunk_usage:
Expand Down Expand Up @@ -405,6 +425,7 @@ async def async_generator():
accumulated_content,
tool_calls_list,
extract_available_tool_calls("openai", kwargs),
model_from_response,
)

return async_generator()
Expand All @@ -422,13 +443,17 @@ async def _capture_streaming_event(
output: Any,
tool_calls: Optional[List[Dict[str, Any]]] = None,
available_tool_calls: Optional[List[Dict[str, Any]]] = None,
model_from_response: Optional[str] = None,
):
if posthog_trace_id is None:
posthog_trace_id = str(uuid.uuid4())

# Use model from kwargs, fallback to model from response
model = kwargs.get("model") or model_from_response or "unknown"

event_properties = {
"$ai_provider": "openai",
"$ai_model": kwargs.get("model"),
"$ai_model": model,
"$ai_model_parameters": get_model_params(kwargs),
"$ai_input": with_privacy_mode(
self._client._ph_client,
Expand Down
4 changes: 2 additions & 2 deletions posthog/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def call_llm_and_track_usage(

event_properties = {
"$ai_provider": provider,
"$ai_model": kwargs.get("model"),
"$ai_model": kwargs.get("model") or getattr(response, "model", None),
"$ai_model_parameters": get_model_params(kwargs),
"$ai_input": with_privacy_mode(
ph_client, posthog_privacy_mode, sanitized_messages
Expand Down Expand Up @@ -396,7 +396,7 @@ async def call_llm_and_track_usage_async(

event_properties = {
"$ai_provider": provider,
"$ai_model": kwargs.get("model"),
"$ai_model": kwargs.get("model") or getattr(response, "model", None),
"$ai_model_parameters": get_model_params(kwargs),
"$ai_input": with_privacy_mode(
ph_client, posthog_privacy_mode, sanitized_messages
Expand Down
Loading
Loading