Skip to content

Commit 3c0ea36

Browse files
committed
add bandit external mode
1 parent bddb070 commit 3c0ea36

File tree

3 files changed

+70
-3
lines changed

3 files changed

+70
-3
lines changed

README.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,14 @@ We do not apply the importance sampling ratio because the policy changes slowly
5656

5757
### External Modes
5858

59-
`external.mode` is set to be 'level_feedback' by default. This gives additional information from external to prompts in the following turns; 'level_feedback' attaches test‑driven diagnostics, while alternatives include 'expert_edits' (an LLM proposes edits), 'level_passed'/'passed' (binary outcomes), and 'plain' (no signals).
59+
`external.mode` is set to 'level_feedback' by default. This gives additional information from external to prompts in the following turns; 'level_feedback' attaches test‑driven diagnostics, while alternatives include:
6060

61-
Specific settings for 'level_feedback' is `external.sandbox_slice`, which controls how many eval tests to include in the feedback. By default, sandbox executes only the first assert (sandbox_slice=1). Use all eval tests by setting `external.sandbox_slice` to 0, None, or 'all'. Negative values use the last asserts. `external.sandbox_slice` only affects analysis-based modes ('level_feedback', 'level_passed', 'passed'), and it has no effect on 'expert_edits'.
61+
- `expert_edits`: an LLM proposes edits; prompts include edit suggestions plus context.
62+
- `level_passed` / `passed`: binary outcome oriented prompts with minimal context.
63+
- `plain`: no diagnostics, but still includes previous response (unless disabled) and a "Revise ..." instruction.
64+
- `bandit`: returns the first‑turn prompts every turn, which enforces `external.original_prompt=true` and `external.previous_response=false` automatically so that turn 1 and later turns receive the same prompt text.
65+
66+
Specific settings for 'level_feedback' is `external.sandbox_slice`, which controls how many eval tests to include in the feedback. By default, sandbox executes only the first assert (sandbox_slice=1). Use all eval tests by setting `external.sandbox_slice` to 0, None, or 'all'. Negative values use the last asserts. `external.sandbox_slice` only affects analysis-based modes ('level_feedback', 'level_passed', 'passed'), and it has no effect on 'expert_edits' or 'bandit'.
6267

6368
Specific settings for 'expert_edits' is `external.expert_edits_model`, which controls which LLM to use for proposing edits. By default, it uses DeepSeek-Coder. You can also change it to Claude-3, GPT-4, once you have keys/tokens in your global environment variables.
6469

external/__init__.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from . import level_passed
77
from . import passed
88
from . import plain
9+
from . import bandit
910
import builtins
1011

1112
# Verbose toggle for external previews
@@ -246,7 +247,39 @@ def print(*args, **kwargs): # type: ignore
246247
print("=" * 60 + "\n")
247248
return (aux_prompt, main_prompt) if int(num_agents) > 1 else [main_prompt]
248249

249-
supported = ["expert_edits", "level_feedback", "level_passed", "passed", "plain"]
250+
if mode == "bandit":
251+
# Enforce flags: original_prompt=True, previous_response=False
252+
original_prompt_flag = True
253+
previous_response_flag = False
254+
if int(num_agents) == 1:
255+
main_comp = agent_completions[0]
256+
aux_comp = ""
257+
else:
258+
aux_comp, main_comp = agent_completions[0], agent_completions[1]
259+
ctx = get_context(prompt) or {}
260+
entry_point = ctx.get("entry_point", "")
261+
test_code = ctx.get("tests_sandbox") or ctx.get("tests_eval", "")
262+
aux_prompt, main_prompt = bandit.format_followup_prompts(
263+
original_prompt=prompt,
264+
aux_completion=aux_comp,
265+
main_completion=main_comp,
266+
test_code=test_code,
267+
entry_point=entry_point,
268+
original_prompt_flag=original_prompt_flag,
269+
previous_response_flag=previous_response_flag,
270+
num_agent=int(num_agents),
271+
)
272+
print("\n" + "=" * 60)
273+
print("EXTERNAL MODE PREVIEW: bandit")
274+
print("-" * 60)
275+
if int(num_agents) > 1:
276+
print("AUX PROMPT:\n" + aux_prompt)
277+
print("-" * 60)
278+
print("MAIN PROMPT:\n" + main_prompt)
279+
print("=" * 60 + "\n")
280+
return (aux_prompt, main_prompt) if int(num_agents) > 1 else [main_prompt]
281+
282+
supported = ["expert_edits", "level_feedback", "level_passed", "passed", "plain", "bandit"]
250283
raise NotImplementedError(
251284
f"External transition mode '{mode}' is not implemented yet. Supported: {', '.join(supported)}"
252285
)

external/bandit.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from typing import Tuple
2+
3+
from .common import build_first_turn_prompts
4+
5+
6+
def format_followup_prompts(
7+
original_prompt: str,
8+
aux_completion: str,
9+
main_completion: str,
10+
test_code: str,
11+
entry_point: str,
12+
original_prompt_flag: bool = True,
13+
previous_response_flag: bool = False,
14+
num_agent: int = 2,
15+
) -> Tuple[str, str]:
16+
"""
17+
Bandit mode: make follow-up prompts identical to the canonical first-turn
18+
prompts. No analysis and no "Revise ..." instructions. Ignores completions.
19+
20+
Returns (aux_prompt, main_prompt). For single-agent, aux will be an empty string
21+
and the caller should use only the main prompt.
22+
"""
23+
# Build the canonical first-turn prompts (context + instructions)
24+
aux_base, main_base = build_first_turn_prompts(original_prompt, entry_point)
25+
if int(num_agent) == 1:
26+
# For single-agent, aux prompt is unused; return an empty aux and main_base
27+
return "", main_base
28+
return aux_base, main_base
29+

0 commit comments

Comments
 (0)