Skip to content

Commit e9251ea

Browse files
committed
fix hardcode
1 parent 4eef38a commit e9251ea

File tree

8 files changed

+16
-0
lines changed

8 files changed

+16
-0
lines changed

configs/grpo_che_config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ grpo:
3737
learning_rate: 2.0e-5
3838
logging_steps: 50
3939
save_steps: 200
40+
eval_interval: 4
41+
eval_num_samples: 4
4042
num_generations: 4
4143
max_new_tokens: 256
4244
joint_mode: aligned

configs/grpo_he_config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ grpo:
3737
learning_rate: 2.0e-5
3838
logging_steps: 50
3939
save_steps: 200
40+
eval_interval: 4
41+
eval_num_samples: 4
4042
num_generations: 4
4143
max_new_tokens: 256
4244
joint_mode: aligned

configs/grpo_mbpp_config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ grpo:
3737
learning_rate: 3.0e-5
3838
logging_steps: 50
3939
save_steps: 200
40+
eval_interval: 4
41+
eval_num_samples: 4
4042
num_generations: 4
4143
max_new_tokens: 256
4244
joint_mode: aligned

configs/magrpo_che_config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ magrpo:
3737
learning_rate: 2.0e-5
3838
logging_steps: 50
3939
save_steps: 200
40+
eval_interval: 4
41+
eval_num_samples: 4
4042
num_generations: 4
4143
max_new_tokens: 256
4244
temperature: 0.8

configs/magrpo_he_config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ magrpo:
3737
learning_rate: 2.0e-5
3838
logging_steps: 50
3939
save_steps: 200
40+
eval_interval: 4
41+
eval_num_samples: 4
4042
num_generations: 4
4143
max_new_tokens: 256
4244
joint_mode: aligned

configs/magrpo_mbpp_config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ magrpo:
3737
learning_rate: 3.0e-5
3838
logging_steps: 50
3939
save_steps: 200
40+
eval_interval: 4
41+
eval_num_samples: 4
4042
num_generations: 4
4143
max_new_tokens: 256
4244
temperature: 0.8

train_grpo.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,8 @@ def _resolver(prompt: str):
357357
learning_rate=grpo_config.get("learning_rate", 2e-5),
358358
logging_steps=grpo_config.get("logging_steps", 50),
359359
save_steps=grpo_config.get("save_steps", 200),
360+
eval_interval=grpo_config.get("eval_interval", 4),
361+
eval_num_samples=grpo_config.get("eval_num_samples", 4),
360362
num_generations=grpo_config.get("num_generations", 4),
361363
max_new_tokens=grpo_config.get("max_new_tokens", 256),
362364
temperature=temperature,

train_magrpo.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,8 @@ def _resolver(prompt: str):
410410
learning_rate=magrpo_config.get("learning_rate", 2e-5),
411411
logging_steps=magrpo_config.get("logging_steps", 50),
412412
save_steps=magrpo_config.get("save_steps", 200),
413+
eval_interval=magrpo_config.get("eval_interval", 4),
414+
eval_num_samples=magrpo_config.get("eval_num_samples", 4),
413415
num_generations=magrpo_config.get("num_generations", 4),
414416
max_new_tokens=magrpo_config.get("max_new_tokens", 256),
415417
temperature=temperature,

0 commit comments

Comments
 (0)