Skip to content

Commit 33e4e7d

Browse files
committed
Merge branch 'main' into feat/MLX-kernel-optimization
2 parents d88aebb + 166f77f commit 33e4e7d

File tree

9 files changed

+274
-153
lines changed

9 files changed

+274
-153
lines changed

examples/function_minimization/evaluator.py

Lines changed: 42 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
import importlib.util
66
import numpy as np
77
import time
8-
import multiprocessing
8+
import concurrent.futures
99
import traceback
10+
import signal
1011

1112

1213
def run_with_timeout(func, args=(), kwargs={}, timeout_seconds=5):
@@ -22,31 +23,13 @@ def run_with_timeout(func, args=(), kwargs={}, timeout_seconds=5):
2223
Returns:
2324
Result of the function or raises TimeoutError
2425
"""
25-
26-
def wrapper(queue, func, args, kwargs):
26+
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
27+
future = executor.submit(func, *args, **kwargs)
2728
try:
28-
result = func(*args, **kwargs)
29-
queue.put(("success", result))
30-
except Exception as e:
31-
queue.put(("error", e))
32-
33-
queue = multiprocessing.Queue()
34-
process = multiprocessing.Process(target=wrapper, args=(queue, func, args, kwargs))
35-
process.start()
36-
process.join(timeout=timeout_seconds)
37-
38-
if process.is_alive():
39-
process.terminate()
40-
process.join()
41-
raise TimeoutError(f"Function timed out after {timeout_seconds} seconds")
42-
43-
if queue.empty():
44-
raise TimeoutError("Function ended without returning a result")
45-
46-
status, result = queue.get()
47-
if status == "error":
48-
raise result
49-
return result
29+
result = future.result(timeout=timeout_seconds)
30+
return result
31+
except concurrent.futures.TimeoutError:
32+
raise TimeoutError(f"Function timed out after {timeout_seconds} seconds")
5033

5134

5235
def safe_float(value):
@@ -107,15 +90,27 @@ def evaluate(program_path):
10790
# Run with timeout
10891
result = run_with_timeout(program.run_search, timeout_seconds=5)
10992

110-
# Check if we got a tuple of 3 values
111-
if not isinstance(result, tuple) or len(result) != 3:
93+
# Handle different result formats
94+
if isinstance(result, tuple):
95+
if len(result) == 3:
96+
x, y, value = result
97+
elif len(result) == 2:
98+
# Assume it's (x, y) and calculate value
99+
x, y = result
100+
# Calculate the function value since it wasn't returned
101+
value = np.sin(x) * np.cos(y) + np.sin(x * y) + (x**2 + y**2) / 20
102+
print(f"Trial {trial}: Got 2 values, calculated function value: {value}")
103+
else:
104+
print(
105+
f"Trial {trial}: Invalid result format, expected tuple of 2 or 3 values but got {len(result)}"
106+
)
107+
continue
108+
else:
112109
print(
113-
f"Trial {trial}: Invalid result format, expected tuple of 3 values but got {type(result)}"
110+
f"Trial {trial}: Invalid result format, expected tuple but got {type(result)}"
114111
)
115112
continue
116113

117-
x, y, value = result
118-
119114
end_time = time.time()
120115

121116
# Ensure all values are float
@@ -264,15 +259,25 @@ def evaluate_stage1(program_path):
264259
# Run a single trial with timeout
265260
result = run_with_timeout(program.run_search, timeout_seconds=5)
266261

267-
# Check if we got a tuple of 3 values
268-
if not isinstance(result, tuple) or len(result) != 3:
269-
print(
270-
f"Stage 1: Invalid result format, expected tuple of 3 values but got {type(result)}"
271-
)
262+
# Handle different result formats
263+
if isinstance(result, tuple):
264+
if len(result) == 3:
265+
x, y, value = result
266+
elif len(result) == 2:
267+
# Assume it's (x, y) and calculate value
268+
x, y = result
269+
# Calculate the function value since it wasn't returned
270+
value = np.sin(x) * np.cos(y) + np.sin(x * y) + (x**2 + y**2) / 20
271+
print(f"Stage 1: Got 2 values, calculated function value: {value}")
272+
else:
273+
print(
274+
f"Stage 1: Invalid result format, expected tuple of 2 or 3 values but got {len(result)}"
275+
)
276+
return {"runs_successfully": 0.0, "error": "Invalid result format"}
277+
else:
278+
print(f"Stage 1: Invalid result format, expected tuple but got {type(result)}")
272279
return {"runs_successfully": 0.0, "error": "Invalid result format"}
273280

274-
x, y, value = result
275-
276281
# Ensure all values are float
277282
x = safe_float(x)
278283
y = safe_float(y)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
scipy

openevolve/controller.py

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
parse_evolve_blocks,
2525
parse_full_rewrite,
2626
)
27+
from openevolve.utils.format_utils import (
28+
format_metrics_safe,
29+
format_improvement_safe,
30+
)
2731

2832
logger = logging.getLogger(__name__)
2933

@@ -332,7 +336,7 @@ async def run(
332336
logger.info(
333337
f"🌟 New best solution found at iteration {i+1}: {child_program.id}"
334338
)
335-
logger.info(f"Metrics: {_format_metrics(child_program.metrics)}")
339+
logger.info(f"Metrics: {format_metrics_safe(child_program.metrics)}")
336340

337341
# Save checkpoint
338342
if (i + 1) % self.config.checkpoint_interval == 0:
@@ -396,7 +400,7 @@ async def run(
396400
if best_program:
397401
logger.info(
398402
f"Evolution complete. Best program has metrics: "
399-
f"{_format_metrics(best_program.metrics)}"
403+
f"{format_metrics_safe(best_program.metrics)}"
400404
)
401405

402406
# Save the best program (using our tracked best program)
@@ -424,30 +428,13 @@ def _log_iteration(
424428
child: Child program
425429
elapsed_time: Elapsed time in seconds
426430
"""
427-
# Calculate improvement
428-
improvement = {}
429-
for metric, value in child.metrics.items():
430-
if metric in parent.metrics:
431-
# Only calculate diff for numeric values
432-
if (
433-
isinstance(value, (int, float))
434-
and isinstance(parent.metrics[metric], (int, float))
435-
and not isinstance(value, bool)
436-
and not isinstance(parent.metrics[metric], bool)
437-
):
438-
try:
439-
diff = value - parent.metrics[metric]
440-
improvement[metric] = diff
441-
except (TypeError, ValueError):
442-
# Skip non-numeric metrics
443-
pass
444-
445-
improvement_str = _format_improvement(improvement)
431+
# Calculate improvement using safe formatting
432+
improvement_str = format_improvement_safe(parent.metrics, child.metrics)
446433

447434
logger.info(
448435
f"Iteration {iteration+1}: Child {child.id} from parent {parent.id} "
449436
f"in {elapsed_time:.2f}s. Metrics: "
450-
f"{_format_metrics(child.metrics)} "
437+
f"{format_metrics_safe(child.metrics)} "
451438
f"(Δ: {improvement_str})"
452439
)
453440

@@ -503,7 +490,7 @@ def _save_checkpoint(self, iteration: int) -> None:
503490

504491
logger.info(
505492
f"Saved best program at checkpoint {iteration} with metrics: "
506-
f"{_format_metrics(best_program.metrics)}"
493+
f"{format_metrics_safe(best_program.metrics)}"
507494
)
508495

509496
logger.info(f"Saved checkpoint at iteration {iteration} to {checkpoint_path}")

openevolve/database.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from openevolve.config import DatabaseConfig
1717
from openevolve.utils.code_utils import calculate_edit_distance
18+
from openevolve.utils.metrics_utils import safe_numeric_average
1819

1920
logger = logging.getLogger(__name__)
2021

@@ -244,7 +245,7 @@ def get_best_program(self, metric: Optional[str] = None) -> Optional[Program]:
244245
# Sort by average of all numeric metrics as fallback
245246
sorted_programs = sorted(
246247
self.programs.values(),
247-
key=lambda p: _safe_avg_metrics(p.metrics),
248+
key=lambda p: safe_numeric_average(p.metrics),
248249
reverse=True,
249250
)
250251
if sorted_programs:
@@ -298,7 +299,7 @@ def get_top_programs(self, n: int = 10, metric: Optional[str] = None) -> List[Pr
298299
# Sort by average of all numeric metrics
299300
sorted_programs = sorted(
300301
self.programs.values(),
301-
key=lambda p: _safe_avg_metrics(p.metrics),
302+
key=lambda p: safe_numeric_average(p.metrics),
302303
reverse=True,
303304
)
304305

@@ -543,7 +544,7 @@ def _calculate_feature_coords(self, program: Program) -> List[int]:
543544
if not program.metrics:
544545
bin_idx = 0
545546
else:
546-
avg_score = _safe_avg_metrics(program.metrics)
547+
avg_score = safe_numeric_average(program.metrics)
547548
bin_idx = min(int(avg_score * self.feature_bins), self.feature_bins - 1)
548549
coords.append(bin_idx)
549550
elif dim in program.metrics:
@@ -595,8 +596,8 @@ def _is_better(self, program1: Program, program2: Program) -> bool:
595596
return program1.metrics["combined_score"] > program2.metrics["combined_score"]
596597

597598
# Fallback to average of all numeric metrics
598-
avg1 = _safe_avg_metrics(program1.metrics)
599-
avg2 = _safe_avg_metrics(program2.metrics)
599+
avg1 = safe_numeric_average(program1.metrics)
600+
avg2 = safe_numeric_average(program2.metrics)
600601

601602
return avg1 > avg2
602603

@@ -614,7 +615,7 @@ def _update_archive(self, program: Program) -> None:
614615

615616
# Otherwise, find worst program in archive
616617
archive_programs = [self.programs[pid] for pid in self.archive]
617-
worst_program = min(archive_programs, key=lambda p: _safe_avg_metrics(p.metrics))
618+
worst_program = min(archive_programs, key=lambda p: safe_numeric_average(p.metrics))
618619

619620
# Replace if new program is better
620621
if self._is_better(program, worst_program):
@@ -821,7 +822,7 @@ def _enforce_population_limit(self) -> None:
821822
# Sort by average metric (worst first)
822823
sorted_programs = sorted(
823824
all_programs,
824-
key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)) if p.metrics else 0.0,
825+
key=lambda p: safe_numeric_average(p.metrics),
825826
)
826827

827828
# Remove worst programs, but never remove the best program
@@ -916,9 +917,7 @@ def migrate_programs(self) -> None:
916917

917918
# Sort by fitness (using combined_score or average metrics)
918919
island_programs.sort(
919-
key=lambda p: p.metrics.get(
920-
"combined_score", sum(p.metrics.values()) / max(1, len(p.metrics))
921-
),
920+
key=lambda p: p.metrics.get("combined_score", safe_numeric_average(p.metrics)),
922921
reverse=True,
923922
)
924923

@@ -963,9 +962,7 @@ def get_island_stats(self) -> List[dict]:
963962

964963
if island_programs:
965964
scores = [
966-
p.metrics.get(
967-
"combined_score", sum(p.metrics.values()) / max(1, len(p.metrics))
968-
)
965+
p.metrics.get("combined_score", safe_numeric_average(p.metrics))
969966
for p in island_programs
970967
]
971968

openevolve/evaluator.py

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from openevolve.config import EvaluatorConfig
1919
from openevolve.llm.ensemble import LLMEnsemble
2020
from openevolve.utils.async_utils import TaskPool, run_in_executor
21+
from openevolve.utils.format_utils import format_metrics_safe
2122

2223
logger = logging.getLogger(__name__)
2324

@@ -123,24 +124,9 @@ async def evaluate_program(
123124
metrics[f"llm_{name}"] = value * self.config.llm_feedback_weight
124125

125126
elapsed = time.time() - start_time
126-
127-
# Safe formatting of metrics to prevent formatting errors
128-
def safe_format_metric_value(value):
129-
"""Safely format a metric value for logging."""
130-
try:
131-
if isinstance(value, (int, float)) and not isinstance(value, bool):
132-
import math
133-
134-
if math.isnan(value) or math.isinf(value):
135-
return str(value)
136-
return f"{value:.4f}"
137-
else:
138-
return str(value)
139-
except (ValueError, TypeError):
140-
return str(value)
141-
142-
metrics_str = ", ".join(
143-
f"{name}={safe_format_metric_value(value)}" for name, value in metrics.items()
127+
logger.info(
128+
f"Evaluated program{program_id_str} in {elapsed:.2f}s: "
129+
f"{format_metrics_safe(metrics)}"
144130
)
145131

146132
logger.info(f"Evaluated program{program_id_str} in {elapsed:.2f}s: {metrics_str}")

0 commit comments

Comments
 (0)