From 352ee338868bfab7e1495e87f2a129cacec89be3 Mon Sep 17 00:00:00 2001 From: Alessandro <17289614+0x0f0f0f@users.noreply.github.com> Date: Mon, 7 Jul 2025 19:14:51 +0200 Subject: [PATCH 1/4] update controller --- openevolve/controller.py | 77 +++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 29 deletions(-) diff --git a/openevolve/controller.py b/openevolve/controller.py index bf4ea683d..ae6b7df5e 100644 --- a/openevolve/controller.py +++ b/openevolve/controller.py @@ -72,7 +72,7 @@ class OpenEvolve: def __init__( self, - initial_program_path: str, + initial_programs_paths: List[str], evaluation_file: str, config_path: Optional[str] = None, config: Optional[Config] = None, @@ -86,9 +86,15 @@ def __init__( # Load from file or use defaults self.config = load_config(config_path) - # Set up output directory + # Assert that initial_programs_paths is a list, and not empty + if not initial_programs_paths: + raise ValueError("initial_programs_paths must be a non-empty list of file paths") + + # Set up output directory. + # If output_dir is specified, use it + # Otherwise, if initial_programs_paths has a single path, use the directory of the initial program. self.output_dir = output_dir or os.path.join( - os.path.dirname(initial_program_path), "openevolve_output" + os.path.dirname(initial_programs_paths[0]), "openevolve_output" ) os.makedirs(self.output_dir, exist_ok=True) @@ -122,13 +128,15 @@ def __init__( logger.debug(f"Generated LLM seed: {llm_seed}") # Load initial program - self.initial_program_path = initial_program_path - self.initial_program_code = self._load_initial_program() + self.initial_programs_paths = initial_programs_paths + self.initial_programs_code = self._load_initial_programs() + + # Assume all initial programs are in the same language if not self.config.language: - self.config.language = extract_code_language(self.initial_program_code) + self.config.language = extract_code_language(self.initial_programs_code[0]) # Extract file extension from initial program - self.file_extension = os.path.splitext(initial_program_path)[1] + self.file_extension = os.path.splitext(initial_programs_paths[0])[1] if not self.file_extension: # Default to .py if no extension found self.file_extension = ".py" @@ -136,6 +144,15 @@ def __init__( # Make sure it starts with a dot if not self.file_extension.startswith("."): self.file_extension = f".{self.file_extension}" + + # Check that all files have the same extension + for path in initial_programs_paths[1:]: + ext = os.path.splitext(path)[1] + if ext != self.file_extension: + raise ValueError( + f"All initial program files must have the same extension. " + f"Expected {self.file_extension}, but got {ext} for {path}" + ) # Initialize components self.llm_ensemble = LLMEnsemble(self.config.llm.models) @@ -160,7 +177,7 @@ def __init__( ) self.evaluation_file = evaluation_file - logger.info(f"Initialized OpenEvolve with {initial_program_path}") + logger.info(f"Initialized OpenEvolve with {initial_programs_paths}") # Initialize improved parallel processing components self.parallel_controller = None @@ -189,10 +206,13 @@ def _setup_logging(self) -> None: logger.info(f"Logging to {log_file}") - def _load_initial_program(self) -> str: - """Load the initial program from file""" - with open(self.initial_program_path, "r") as f: - return f.read() + def _load_initial_programs(self) -> str: + """Load the initial programs from file""" + programs = [] + for path in self.initial_programs_paths: + with open(path, "r") as f: + programs.append(f.read()) + return programs async def run( self, @@ -226,29 +246,28 @@ async def run( should_add_initial = ( start_iteration == 0 and len(self.database.programs) == 0 - and not any( - p.code == self.initial_program_code for p in self.database.programs.values() - ) ) if should_add_initial: - logger.info("Adding initial program to database") - initial_program_id = str(uuid.uuid4()) + logger.info("Adding initial programs to database") + for code in self.initial_programs_code: + initial_program_id = str(uuid.uuid4()) - # Evaluate the initial program - initial_metrics = await self.evaluator.evaluate_program( - self.initial_program_code, initial_program_id - ) + # Evaluate the initial program + initial_metrics = await self.evaluator.evaluate_program( + code, initial_program_id + ) - initial_program = Program( - id=initial_program_id, - code=self.initial_program_code, - language=self.config.language, - metrics=initial_metrics, - iteration_found=start_iteration, - ) + initial_program = Program( + id=initial_program_id, + code=code, + language=self.config.language, + metrics=initial_metrics, + iteration_found=start_iteration, + ) - self.database.add(initial_program) + # TODO. Should the island be incremented and reset here? + self.database.add(initial_program) else: logger.info( f"Skipping initial program addition (resuming from iteration {start_iteration} " From 286a70aa02d035856e45a2f5cfe2316cc9dc4278 Mon Sep 17 00:00:00 2001 From: Alessandro <17289614+0x0f0f0f@users.noreply.github.com> Date: Mon, 7 Jul 2025 19:15:07 +0200 Subject: [PATCH 2/4] change cli --- openevolve/cli.py | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/openevolve/cli.py b/openevolve/cli.py index dd5d707dd..384267643 100644 --- a/openevolve/cli.py +++ b/openevolve/cli.py @@ -19,7 +19,7 @@ def parse_args() -> argparse.Namespace: """Parse command-line arguments""" parser = argparse.ArgumentParser(description="OpenEvolve - Evolutionary coding agent") - parser.add_argument("initial_program", help="Path to the initial program file") + parser.add_argument("initial_program", help="Path to the initial program file", default=None) parser.add_argument( "evaluation_file", help="Path to the evaluation file containing an 'evaluate' function" @@ -57,6 +57,8 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--secondary-model", help="Secondary LLM model name", default=None) + parser.add_argument("--initial-programs-dir", help="Path to initial programs directory", default=None) + return parser.parse_args() @@ -69,9 +71,38 @@ async def main_async() -> int: """ args = parse_args() - # Check if files exist - if not os.path.exists(args.initial_program): - print(f"Error: Initial program file '{args.initial_program}' not found") + # Check if files exist. + # If args.initial_program is present, it should be a file. + # If args.initial_programs_dir is present, it should be a directory, and args.initial_program should be `None`. + if args.initial_programs_dir: + if args.initial_program: + print("Error: Cannot specify both initial-program and --initial-programs-dir") + return 1 + if not os.path.isdir(args.initial_programs_dir): + print(f"Error: Initial programs path '{args.initial_programs_dir}' is not a directory") + return 1 + elif args.initial_program: + if args.initial_programs_dir: + print("Error: Cannot specify both --initial-programs-dir and initial_program") + return 1 + if not os.path.isfile(args.initial_program): + print(f"Error: Initial program file '{args.initial_program}' is not a file") + return 1 + + # Populate the initial_programs_paths vector. + initial_programs_paths = [] + if args.initial_programs_dir: + initial_programs_paths = [ + os.path.join(args.initial_programs_dir, f) + for f in os.listdir(args.initial_programs_dir) + if f.endswith(".py") + ] + elif args.initial_program: + initial_programs_paths = [args.initial_program] + + # Check that initial_programs_paths is not empty + if not initial_programs_paths: + print("Error: No initial programs found. Please provide a valid initial program or directory.") return 1 if not os.path.exists(args.evaluation_file): @@ -100,7 +131,7 @@ async def main_async() -> int: # Initialize OpenEvolve try: openevolve = OpenEvolve( - initial_program_path=args.initial_program, + initial_programs_paths=initial_programs_paths, evaluation_file=args.evaluation_file, config=config, config_path=args.config if config is None else None, From fb5e6544bca3c76b3cd536445a619afeb4940666 Mon Sep 17 00:00:00 2001 From: Alessandro <17289614+0x0f0f0f@users.noreply.github.com> Date: Tue, 8 Jul 2025 00:32:44 +0200 Subject: [PATCH 3/4] update tests and controller --- README.md | 17 +++++++++++++++-- configs/README.md | 18 ++++++++++++------ openevolve/cli.py | 8 ++++++-- openevolve/controller.py | 10 ++++++++-- tests/test_checkpoint_resume.py | 10 +++++----- 5 files changed, 46 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index ab7f46ff5..563f5a5dc 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ An open-source implementation of the AlphaEvolve system described in the Google OpenEvolve is an evolutionary coding agent that uses Large Language Models to optimize code through an iterative process. It orchestrates a pipeline of LLM-based code generation, evaluation, and selection to continuously improve programs for a variety of tasks. Key features: + - Evolution of entire code files, not just single functions - Support for multiple programming languages - Supports OpenAI-compatible APIs for any LLM @@ -34,6 +35,7 @@ The controller orchestrates interactions between these components in an asynchro ### Installation To install natively, use: + ```bash git clone https://github.com/codelion/openevolve.git cd openevolve @@ -51,7 +53,7 @@ from openevolve import OpenEvolve # Initialize the system evolve = OpenEvolve( - initial_program_path="path/to/initial_program.py", + initial_programs_paths=["path/to/initial_program.py"], evaluation_file="path/to/evaluator.py", config_path="path/to/config.yaml" ) @@ -83,6 +85,7 @@ python openevolve-run.py path/to/initial_program.py path/to/evaluator.py \ ``` When resuming from a checkpoint: + - The system loads all previously evolved programs and their metrics - Checkpoint numbering continues from where it left off (e.g., if loaded from checkpoint_50, the next checkpoint will be checkpoint_60) - All evolution state is preserved (best programs, feature maps, archives, etc.) @@ -145,6 +148,7 @@ python scripts/visualizer.py --path examples/function_minimization/openevolve_ou ``` In the visualization UI, you can + - see the branching of your program evolution in a network visualization, with node radius chosen by the program fitness (= the currently selected metric), - see the parent-child relationship of nodes and click through them in the sidebar (use the yellow locator icon in the sidebar to center the node in the graph), - select the metric of interest (with the available metric choices depending on your data set), @@ -157,6 +161,7 @@ In the visualization UI, you can ### Docker You can also install and execute via Docker: + ```bash docker build -t openevolve . docker run --rm -v $(pwd):/app --network="host" openevolve examples/function_minimization/initial_program.py examples/function_minimization/evaluator.py --config examples/function_minimization/config.yaml --iterations 1000 @@ -179,6 +184,7 @@ database: ``` Sample configuration files are available in the `configs/` directory: + - `default_config.yaml`: Comprehensive configuration with all available options See the [Configuration Guide](configs/default_config.yaml) for a full list of options. @@ -205,18 +211,23 @@ return EvaluationResult( ``` The next generation prompt will include: + ```markdown ## Last Execution Output + ### Stderr + SyntaxError: invalid syntax (line 15) ### Traceback + ... ``` ## Example: LLM Feedback An example for an LLM artifact side channel is part of the default evaluation template, which ends with + ```markdown Return your evaluation as a JSON object with the following format: {{ @@ -226,6 +237,7 @@ Return your evaluation as a JSON object with the following format: "reasoning": "[brief explanation of scores]" }} ``` + The non-float values, in this case the "reasoning" key of the json response that the evaluator LLM generates, will be available within the next generation prompt. ### Configuration @@ -239,7 +251,7 @@ evaluator: prompt: include_artifacts: true - max_artifact_bytes: 4096 # 4KB limit in prompts + max_artifact_bytes: 4096 # 4KB limit in prompts artifact_security_filter: true ``` @@ -266,6 +278,7 @@ A comprehensive example demonstrating OpenEvolve's application to symbolic regre [Explore the Symbolic Regression Example](examples/symbolic_regression/) Key features: + - Automatic generation of initial programs from benchmark tasks - Evolution from simple linear models to complex mathematical expressions - Evaluation on physics, chemistry, biology, and material science datasets diff --git a/configs/README.md b/configs/README.md index 6ce24383c..4fd43c4fa 100644 --- a/configs/README.md +++ b/configs/README.md @@ -5,7 +5,9 @@ This directory contains configuration files for OpenEvolve with examples for dif ## Configuration Files ### `default_config.yaml` + The main configuration file containing all available options with sensible defaults. This file includes: + - Complete documentation for all configuration parameters - Default values for all settings - **Island-based evolution parameters** for proper evolutionary diversity @@ -13,15 +15,19 @@ The main configuration file containing all available options with sensible defau Use this file as a template for your own configurations. ### `island_config_example.yaml` + A practical example configuration demonstrating proper island-based evolution setup. Shows: + - Recommended island settings for most use cases - Balanced migration parameters - Complete working configuration ### `island_examples.yaml` + Multiple example configurations for different scenarios: + - **Maximum Diversity**: Many islands, frequent migration -- **Focused Exploration**: Few islands, rare migration +- **Focused Exploration**: Few islands, rare migration - **Balanced Approach**: Default recommended settings - **Quick Exploration**: Small-scale rapid testing - **Large-Scale Evolution**: Complex optimization runs @@ -34,9 +40,9 @@ The key new parameters for proper evolutionary diversity are: ```yaml database: - num_islands: 5 # Number of separate populations - migration_interval: 50 # Migrate every N generations - migration_rate: 0.1 # Fraction of top programs to migrate + num_islands: 5 # Number of separate populations + migration_interval: 50 # Migrate every N generations + migration_rate: 0.1 # Fraction of top programs to migrate ``` ### Parameter Guidelines @@ -66,8 +72,8 @@ Then use with OpenEvolve: ```python from openevolve import OpenEvolve evolve = OpenEvolve( - initial_program_path="program.py", - evaluation_file="evaluator.py", + initial_program_paths=["program.py"], + evaluation_file="evaluator.py", config_path="my_config.yaml" ) ``` diff --git a/openevolve/cli.py b/openevolve/cli.py index 384267643..7f9410d93 100644 --- a/openevolve/cli.py +++ b/openevolve/cli.py @@ -19,11 +19,15 @@ def parse_args() -> argparse.Namespace: """Parse command-line arguments""" parser = argparse.ArgumentParser(description="OpenEvolve - Evolutionary coding agent") - parser.add_argument("initial_program", help="Path to the initial program file", default=None) - parser.add_argument( "evaluation_file", help="Path to the evaluation file containing an 'evaluate' function" ) + parser.add_argument( + "initial_program", + nargs="?", + help="Path to the initial program file", + default=None, + ) parser.add_argument("--config", "-c", help="Path to configuration file (YAML)", default=None) diff --git a/openevolve/controller.py b/openevolve/controller.py index ae6b7df5e..0db2fc3ac 100644 --- a/openevolve/controller.py +++ b/openevolve/controller.py @@ -250,7 +250,13 @@ async def run( if should_add_initial: logger.info("Adding initial programs to database") - for code in self.initial_programs_code: + + if len(self.initial_programs_code) > len(self.database.islands): + raise ValueError( + "Number of initial programs exceeds number of islands." + ) + + for i, code in enumerate(self.initial_programs_code): initial_program_id = str(uuid.uuid4()) # Evaluate the initial program @@ -267,7 +273,7 @@ async def run( ) # TODO. Should the island be incremented and reset here? - self.database.add(initial_program) + self.database.add(initial_program, 0, i) else: logger.info( f"Skipping initial program addition (resuming from iteration {start_iteration} " diff --git a/tests/test_checkpoint_resume.py b/tests/test_checkpoint_resume.py index 08baaf956..c2db03b7c 100644 --- a/tests/test_checkpoint_resume.py +++ b/tests/test_checkpoint_resume.py @@ -86,7 +86,7 @@ async def run_test(): mock_evaluator_class.return_value = mock_evaluator controller = OpenEvolve( - initial_program_path=self.test_program_path, + initial_programs_paths=[self.test_program_path], evaluation_file=self.evaluator_path, config=self.config, output_dir=self.test_dir, @@ -127,7 +127,7 @@ async def run_test(): mock_evaluator_class.return_value = mock_evaluator controller = OpenEvolve( - initial_program_path=self.test_program_path, + initial_programs_paths=[self.test_program_path], evaluation_file=self.evaluator_path, config=self.config, output_dir=self.test_dir, @@ -169,7 +169,7 @@ async def run_test(): mock_evaluator_class.return_value = mock_evaluator controller = OpenEvolve( - initial_program_path=self.test_program_path, + initial_programs_paths=[self.test_program_path], evaluation_file=self.evaluator_path, config=self.config, output_dir=self.test_dir, @@ -219,7 +219,7 @@ async def run_test(): mock_evaluator_class.return_value = mock_evaluator controller = OpenEvolve( - initial_program_path=self.test_program_path, + initial_programs_paths=[self.test_program_path], evaluation_file=self.evaluator_path, config=self.config, output_dir=self.test_dir, @@ -269,7 +269,7 @@ async def run_test(): mock_evaluator_class.return_value = mock_evaluator controller = OpenEvolve( - initial_program_path=self.test_program_path, + initial_programs_paths=[self.test_program_path], evaluation_file=self.evaluator_path, config=self.config, output_dir=self.test_dir, From 529a06eb336a1825aa987b2de3473ccd5b8a0504 Mon Sep 17 00:00:00 2001 From: Alessandro <17289614+0x0f0f0f@users.noreply.github.com> Date: Tue, 8 Jul 2025 00:42:48 +0200 Subject: [PATCH 4/4] update cli --- openevolve/cli.py | 46 +++++++++------------------------------------- 1 file changed, 9 insertions(+), 37 deletions(-) diff --git a/openevolve/cli.py b/openevolve/cli.py index 7f9410d93..cb10daf80 100644 --- a/openevolve/cli.py +++ b/openevolve/cli.py @@ -22,10 +22,11 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "evaluation_file", help="Path to the evaluation file containing an 'evaluate' function" ) + parser.add_argument( - "initial_program", - nargs="?", - help="Path to the initial program file", + "initial_programs", + nargs="+", + help="Path(s) to one or more initial program files", default=None, ) @@ -61,8 +62,6 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--secondary-model", help="Secondary LLM model name", default=None) - parser.add_argument("--initial-programs-dir", help="Path to initial programs directory", default=None) - return parser.parse_args() @@ -76,39 +75,12 @@ async def main_async() -> int: args = parse_args() # Check if files exist. - # If args.initial_program is present, it should be a file. - # If args.initial_programs_dir is present, it should be a directory, and args.initial_program should be `None`. - if args.initial_programs_dir: - if args.initial_program: - print("Error: Cannot specify both initial-program and --initial-programs-dir") - return 1 - if not os.path.isdir(args.initial_programs_dir): - print(f"Error: Initial programs path '{args.initial_programs_dir}' is not a directory") - return 1 - elif args.initial_program: - if args.initial_programs_dir: - print("Error: Cannot specify both --initial-programs-dir and initial_program") - return 1 - if not os.path.isfile(args.initial_program): - print(f"Error: Initial program file '{args.initial_program}' is not a file") + + for program in args.initial_programs: + if not os.path.isfile(program): + print(f"Error: Initial program file '{program}' does not exist") return 1 - # Populate the initial_programs_paths vector. - initial_programs_paths = [] - if args.initial_programs_dir: - initial_programs_paths = [ - os.path.join(args.initial_programs_dir, f) - for f in os.listdir(args.initial_programs_dir) - if f.endswith(".py") - ] - elif args.initial_program: - initial_programs_paths = [args.initial_program] - - # Check that initial_programs_paths is not empty - if not initial_programs_paths: - print("Error: No initial programs found. Please provide a valid initial program or directory.") - return 1 - if not os.path.exists(args.evaluation_file): print(f"Error: Evaluation file '{args.evaluation_file}' not found") return 1 @@ -135,7 +107,7 @@ async def main_async() -> int: # Initialize OpenEvolve try: openevolve = OpenEvolve( - initial_programs_paths=initial_programs_paths, + initial_programs_paths=args.initial_programs, evaluation_file=args.evaluation_file, config=config, config_path=args.config if config is None else None,