Skip to content

Commit 7db5ce2

Browse files
Exclude '*mini' models from prompt_cache_retention (#1345)
Co-authored-by: openhands <openhands@all-hands.dev>
1 parent af88f69 commit 7db5ce2

File tree

3 files changed

+109
-37
lines changed

3 files changed

+109
-37
lines changed

openhands-sdk/openhands/sdk/llm/utils/model_features.py

Lines changed: 58 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,28 @@ def model_matches(model: str, patterns: list[str]) -> bool:
1515
return False
1616

1717

18+
def apply_ordered_model_rules(model: str, rules: list[str]) -> bool:
19+
"""Apply ordered include/exclude model rules to determine final support.
20+
21+
Rules semantics:
22+
- Each entry is a substring token. '!' prefix marks an exclude rule.
23+
- Case-insensitive substring matching against the raw model string.
24+
- Evaluated in order; the last matching rule wins.
25+
- If no rule matches, returns False.
26+
"""
27+
raw = (model or "").strip().lower()
28+
decided: bool | None = None
29+
for rule in rules:
30+
token = rule.strip().lower()
31+
if not token:
32+
continue
33+
is_exclude = token.startswith("!")
34+
core = token[1:] if is_exclude else token
35+
if core and core in raw:
36+
decided = not is_exclude
37+
return bool(decided)
38+
39+
1840
@dataclass(frozen=True)
1941
class ModelFeatures:
2042
supports_reasoning_effort: bool
@@ -27,9 +49,9 @@ class ModelFeatures:
2749
supports_prompt_cache_retention: bool
2850

2951

30-
# Pattern tables capturing current behavior. Keep patterns lowercase.
52+
# Model lists capturing current behavior. Keep entries lowercase.
3153

32-
REASONING_EFFORT_PATTERNS: list[str] = [
54+
REASONING_EFFORT_MODELS: list[str] = [
3355
# Mirror main behavior exactly (no unintended expansion)
3456
"o1-2024-12-17",
3557
"o1",
@@ -47,15 +69,15 @@ class ModelFeatures:
4769
"claude-opus-4-5",
4870
]
4971

50-
EXTENDED_THINKING_PATTERNS: list[str] = [
72+
EXTENDED_THINKING_MODELS: list[str] = [
5173
# Anthropic model family
5274
# We did not include sonnet 3.7 and 4 here as they don't brings
5375
# significant performance improvements for agents
5476
"claude-sonnet-4-5",
5577
"claude-haiku-4-5",
5678
]
5779

58-
PROMPT_CACHE_PATTERNS: list[str] = [
80+
PROMPT_CACHE_MODELS: list[str] = [
5981
"claude-3-7-sonnet",
6082
"claude-sonnet-3-7-latest",
6183
"claude-3-5-sonnet",
@@ -70,14 +92,27 @@ class ModelFeatures:
7092
]
7193

7294
# Models that support a top-level prompt_cache_retention parameter
73-
PROMPT_CACHE_RETENTION_PATTERNS: list[str] = [
74-
# OpenAI GPT-5+ family
95+
# Source: OpenAI Prompt Caching docs (extended retention), which list:
96+
# - gpt-5.2
97+
# - gpt-5.1
98+
# - gpt-5.1-codex
99+
# - gpt-5.1-codex-mini
100+
# - gpt-5.1-chat-latest
101+
# - gpt-5
102+
# - gpt-5-codex
103+
# - gpt-4.1
104+
# Use ordered include/exclude rules (last wins) to naturally express exceptions.
105+
PROMPT_CACHE_RETENTION_MODELS: list[str] = [
106+
# Broad allow for GPT-5 family and GPT-4.1 (covers gpt-5.2 and variants)
75107
"gpt-5",
76-
# GPT-4.1 too
77108
"gpt-4.1",
109+
# Exclude all mini variants by default
110+
"!mini",
111+
# Re-allow the explicitly documented supported mini variant
112+
"gpt-5.1-codex-mini",
78113
]
79114

80-
SUPPORTS_STOP_WORDS_FALSE_PATTERNS: list[str] = [
115+
SUPPORTS_STOP_WORDS_FALSE_MODELS: list[str] = [
81116
# o-series families don't support stop words
82117
"o1",
83118
"o3",
@@ -89,7 +124,7 @@ class ModelFeatures:
89124
]
90125

91126
# Models that should use the OpenAI Responses API path by default
92-
RESPONSES_API_PATTERNS: list[str] = [
127+
RESPONSES_API_MODELS: list[str] = [
93128
# OpenAI GPT-5 family (includes mini variants)
94129
"gpt-5",
95130
# OpenAI Codex (uses Responses API)
@@ -101,7 +136,7 @@ class ModelFeatures:
101136
# and need plain strings instead
102137
# NOTE: model_matches uses case-insensitive substring matching, not globbing.
103138
# Keep these entries as bare substrings without wildcards.
104-
FORCE_STRING_SERIALIZER_PATTERNS: list[str] = [
139+
FORCE_STRING_SERIALIZER_MODELS: list[str] = [
105140
"deepseek", # e.g., DeepSeek-V3.2-Exp
106141
"glm", # e.g., GLM-4.5 / GLM-4.6
107142
# Kimi K2-Instruct requires string serialization only on Groq
@@ -110,32 +145,31 @@ class ModelFeatures:
110145

111146
# Models that we should send full reasoning content
112147
# in the message input
113-
SEND_REASONING_CONTENT_PATTERNS: list[str] = [
148+
SEND_REASONING_CONTENT_MODELS: list[str] = [
114149
"kimi-k2-thinking",
115150
]
116151

117152

118153
def get_features(model: str) -> ModelFeatures:
119154
"""Get model features."""
120155
return ModelFeatures(
121-
supports_reasoning_effort=model_matches(model, REASONING_EFFORT_PATTERNS),
122-
supports_extended_thinking=model_matches(model, EXTENDED_THINKING_PATTERNS),
123-
supports_prompt_cache=model_matches(model, PROMPT_CACHE_PATTERNS),
124-
supports_stop_words=not model_matches(
125-
model, SUPPORTS_STOP_WORDS_FALSE_PATTERNS
126-
),
127-
supports_responses_api=model_matches(model, RESPONSES_API_PATTERNS),
128-
force_string_serializer=model_matches(model, FORCE_STRING_SERIALIZER_PATTERNS),
129-
send_reasoning_content=model_matches(model, SEND_REASONING_CONTENT_PATTERNS),
130-
supports_prompt_cache_retention=model_matches(
131-
model, PROMPT_CACHE_RETENTION_PATTERNS
156+
supports_reasoning_effort=model_matches(model, REASONING_EFFORT_MODELS),
157+
supports_extended_thinking=model_matches(model, EXTENDED_THINKING_MODELS),
158+
supports_prompt_cache=model_matches(model, PROMPT_CACHE_MODELS),
159+
supports_stop_words=not model_matches(model, SUPPORTS_STOP_WORDS_FALSE_MODELS),
160+
supports_responses_api=model_matches(model, RESPONSES_API_MODELS),
161+
force_string_serializer=model_matches(model, FORCE_STRING_SERIALIZER_MODELS),
162+
send_reasoning_content=model_matches(model, SEND_REASONING_CONTENT_MODELS),
163+
# Extended prompt_cache_retention support follows ordered include/exclude rules.
164+
supports_prompt_cache_retention=apply_ordered_model_rules(
165+
model, PROMPT_CACHE_RETENTION_MODELS
132166
),
133167
)
134168

135169

136170
# Default temperature mapping.
137171
# Each entry: (pattern, default_temperature)
138-
DEFAULT_TEMPERATURE_PATTERNS: list[tuple[str, float]] = [
172+
DEFAULT_TEMPERATURE_MODELS: list[tuple[str, float]] = [
139173
("kimi-k2-thinking", 1.0),
140174
]
141175

@@ -145,7 +179,7 @@ def get_default_temperature(model: str) -> float:
145179
146180
Uses case-insensitive substring matching via model_matches.
147181
"""
148-
for pattern, value in DEFAULT_TEMPERATURE_PATTERNS:
182+
for pattern, value in DEFAULT_TEMPERATURE_MODELS:
149183
if model_matches(model, [pattern]):
150184
return value
151185
return 0.0

tests/sdk/llm/test_model_features.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,13 +250,19 @@ def test_force_string_serializer_full_model_names():
250250
("gpt-5.1", True),
251251
("openai/gpt-5.1-codex-mini", True),
252252
("gpt-5", True),
253-
("openai/gpt-5-mini", True),
253+
# New GPT-5.2 family should support extended retention
254+
("gpt-5.2", True),
255+
("openai/gpt-5.2-chat-latest", True),
256+
("openai/gpt-5.2-pro", True),
257+
("openai/gpt-5-mini", False),
254258
("gpt-4o", False),
255259
("openai/gpt-4.1", True),
256260
("litellm_proxy/gpt-4.1", True),
257261
("litellm_proxy/openai/gpt-4.1", True),
258262
("litellm_proxy/openai/gpt-5", True),
259-
("litellm_proxy/openai/gpt-5-mini", True),
263+
("litellm_proxy/openai/gpt-5-mini", False),
264+
("openai/gpt-5.1-mini", False),
265+
("openai/gpt-5-mini-2025-08-07", False),
260266
],
261267
)
262268
def test_prompt_cache_retention_support(model, expected_retention):

tests/sdk/llm/test_responses_parsing_and_kwargs.py

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -167,20 +167,52 @@ def test_responses_reasoning_effort_none_not_sent_for_gpt_5_1(model):
167167

168168

169169
def test_chat_and_responses_options_prompt_cache_retention_gpt_5_plus_and_non_gpt():
170-
# GPT-5+ should include prompt_cache_retention as a top-level arg
171-
llm_51 = LLM(model="openai/gpt-5.1-codex-mini")
172-
opts_51_chat = select_chat_options(llm_51, {}, has_tools=False)
173-
assert opts_51_chat.get("prompt_cache_retention") == "24h"
170+
# Confirm allowed: 5.1 codex mini supports extended retention per docs
171+
llm_51_codex_mini = LLM(model="openai/gpt-5.1-codex-mini")
172+
opts_51_codex_mini_resp = select_responses_options(
173+
llm_51_codex_mini, {}, include=None, store=None
174+
)
175+
assert opts_51_codex_mini_resp.get("prompt_cache_retention") == "24h"
174176

175-
opts_51_resp = select_responses_options(llm_51, {}, include=None, store=None)
176-
assert opts_51_resp.get("prompt_cache_retention") == "24h"
177+
# New GPT-5.2 variants should include prompt_cache_retention
178+
llm_52 = LLM(model="openai/gpt-5.2")
179+
assert (
180+
select_chat_options(llm_52, {}, has_tools=False).get("prompt_cache_retention")
181+
== "24h"
182+
)
183+
assert (
184+
select_responses_options(llm_52, {}, include=None, store=None).get(
185+
"prompt_cache_retention"
186+
)
187+
== "24h"
188+
)
189+
190+
llm_52_chat_latest = LLM(model="openai/gpt-5.2-chat-latest")
191+
assert (
192+
select_chat_options(llm_52_chat_latest, {}, has_tools=False).get(
193+
"prompt_cache_retention"
194+
)
195+
== "24h"
196+
)
177197

178-
llm_5 = LLM(model="openai/gpt-5-mini")
179-
opts_5_chat = select_chat_options(llm_5, {}, has_tools=False)
180-
assert opts_5_chat.get("prompt_cache_retention") == "24h"
198+
# GPT-5.1 (non-mini) should include prompt_cache_retention; mini variants should not
199+
llm_51_mini = LLM(model="openai/gpt-5.1-mini")
200+
opts_51_mini_chat = select_chat_options(llm_51_mini, {}, has_tools=False)
201+
assert "prompt_cache_retention" not in opts_51_mini_chat
181202

182-
opts_5_resp = select_responses_options(llm_5, {}, include=None, store=None)
183-
assert opts_5_resp.get("prompt_cache_retention") == "24h"
203+
opts_51_mini_resp = select_responses_options(
204+
llm_51_mini, {}, include=None, store=None
205+
)
206+
assert "prompt_cache_retention" not in opts_51_mini_resp
207+
208+
llm_5_mini = LLM(model="openai/gpt-5-mini")
209+
opts_5_mini_chat = select_chat_options(llm_5_mini, {}, has_tools=False)
210+
assert "prompt_cache_retention" not in opts_5_mini_chat
211+
212+
opts_5_mini_resp = select_responses_options(
213+
llm_5_mini, {}, include=None, store=None
214+
)
215+
assert "prompt_cache_retention" not in opts_5_mini_resp
184216

185217
# Non-GPT-5.1 should not include it at all
186218
llm_other = LLM(model="gpt-4o")

0 commit comments

Comments
 (0)