diff --git a/README.md b/README.md index d04bdc7..65dd0b1 100644 --- a/README.md +++ b/README.md @@ -128,8 +128,7 @@ This demonstrates successful translation of: pytocplusplus/ ├── src/ │ ├── analyzer/ # Python code analysis components -│ │ ├── code_analyzer.py # Original analyzer with bug -│ │ └── code_analyzer_fixed.py # Fixed implementation +│ │ └── code_analyzer_fixed.py # Analyzer implementation │ ├── converter/ # C++ conversion components │ │ ├── code_generator.py # Original generator │ │ └── code_generator_fixed.py # Fixed implementation @@ -160,6 +159,7 @@ Currently, PyToC++ effectively handles: - **Method overriding** and polymorphism - **Constructor translation** with proper initialization +- **Math function translation**: `math.sqrt`, `math.sin`, and `math.cos` are translated to their C++ `std` equivalents. More complex features are under development, including: - Exception handling - Standard library mapping diff --git a/docs/ComprehensiveCodeAnalysisReport.md b/docs/ComprehensiveCodeAnalysisReport.md new file mode 100644 index 0000000..c72c488 --- /dev/null +++ b/docs/ComprehensiveCodeAnalysisReport.md @@ -0,0 +1,122 @@ +# Comprehensive Code Analysis Report + +## Top-Level Files + +### `./README.md` +* **Purpose**: Main entry point for project information, setup, usage. +* **Completion Status/Key Observations**: Largely up-to-date with recent "Sprint 2" achievements (class support, Union types). Details installation, usage for simple and class examples, output structure, and basic project structure. Mentions supported features and areas for development. +* **Key Relations**: Links to LICENSE, references `requirements.txt`, `examples/`, `src/main.py`. +* **Potential Enhancements/Improvements**: + * Explicitly state that `class_example.py` is the primary example for current advanced features. + * Link to or summarize key findings from `docs/` for a fuller picture of limitations. + +### `./requirements.txt` +* **Purpose**: Lists Python dependencies. +* **Completion Status/Key Observations**: Contains standard tools for analysis, testing, formatting (`astroid`, `pylint`, `mypy`, `pytest`, `black`, `networkx`, `typing-extensions`). Appears complete for current needs. +* **Key Relations**: Used in `CONTRIBUTING.md` for setup, essential for development environment. +* **Potential Enhancements/Improvements**: Consider version pinning for more reproducible builds if issues arise. + +### `./CONTRIBUTING.md` +* **Purpose**: Provides guidelines for contributing to the project. +* **Completion Status/Key Observations**: Outlines setup, coding standards (Black, Pylint, Mypy), testing procedures, and commit message format. Appears comprehensive. +* **Key Relations**: References `requirements.txt`, `tox.ini`. +* **Potential Enhancements/Improvements**: None apparent at this time. + +### `./LICENSE` +* **Purpose**: Specifies the legal terms under which the project is distributed. +* **Completion Status/Key Observations**: Uses the MIT License, a permissive open-source license. +* **Key Relations**: Referenced in `README.md`. +* **Potential Enhancements/Improvements**: None. + +### `./tox.ini` +* **Purpose**: Configuration file for tox, an automation tool for Python testing. +* **Completion Status/Key Observations**: Defines test environments for linting (Pylint, Mypy, Black) and unit testing (pytest). Includes commands and dependencies for each environment. +* **Key Relations**: Used by `tox` for automated testing and linting. Crucial for CI/CD. +* **Potential Enhancements/Improvements**: Could be expanded with more specific test targets or coverage analysis. + +### `./.gitignore` +* **Purpose**: Specifies intentionally untracked files that Git should ignore. +* **Completion Status/Key Observations**: Includes common Python-related files/directories (`__pycache__`, `*.pyc`, `.env`), virtual environment directories (`venv`, `env`), build artifacts (`dist`, `build`), and IDE-specific files. Seems well-configured. +* **Key Relations**: Standard Git configuration file. +* **Potential Enhancements/Improvements**: None apparent. + +## `src/` Directory + +### `src/main.py` +* **Purpose**: Main executable script for the Python to DOT graph conversion. Handles command-line arguments, file processing, and DOT graph generation. +* **Completion Status/Key Observations**: Core logic for parsing Python code using `astroid`, building a graph with `networkx`, and outputting DOT format. Supports basic types, functions, classes, and modules. Recent additions include handling of Union types and improved class member representation. +* **Key Relations**: Uses `astroid` for AST parsing, `networkx` for graph representation. Interacts with `src/output_graphs.py`. Reads Python files from `examples/`. +* **Potential Enhancements/Improvements**: + * Refactor large functions for better modularity. + * Enhance error handling for malformed Python inputs. + * Add support for more complex type hints and Python features. + +### `src/output_graphs.py` +* **Purpose**: Responsible for generating the DOT language output from the `networkx` graph. +* **Completion Status/Key Observations**: Contains functions to format nodes and edges according to DOT syntax, including styling for different Python constructs (classes, functions, modules, variables, types). +* **Key Relations**: Consumes `networkx` graph objects generated by `src/main.py`. +* **Potential Enhancements/Improvements**: + * Offer more customization options for graph appearance (colors, shapes). + * Support different output formats beyond DOT (e.g., GML, GraphML). + +## `examples/` Directory + +### `examples/simple_example.py` +* **Purpose**: Provides a basic Python script for demonstrating the tool's functionality with simple functions, variables, and type hints. +* **Completion Status/Key Observations**: Contains straightforward examples of global variables, functions with typed arguments and return values. +* **Key Relations**: Used as an input for `src/main.py` for testing and demonstration. +* **Potential Enhancements/Improvements**: Could include a slightly more complex function or a basic class to showcase more features. + +### `examples/class_example.py` +* **Purpose**: Demonstrates the tool's capabilities with Python classes, including methods, attributes, inheritance, and Union type hints. +* **Completion Status/Key Observations**: Contains classes with constructors, methods (with `self`), class variables, instance variables, and inheritance. Uses `Union` and `Optional` type hints. This is the primary example for current advanced features. +* **Key Relations**: Used as a key input for `src/main.py` for testing class-related feature support. +* **Potential Enhancements/Improvements**: Add examples of multiple inheritance or more complex class interactions if those features are further developed. + +### `examples/module_example/` +* **Purpose**: Directory containing multiple Python files (`module1.py`, `module2.py`) to demonstrate inter-module dependencies and imports. +* **Completion Status/Key Observations**: `module1.py` defines functions and classes, `module2.py` imports and uses them. +* **Key Relations**: Shows how `src/main.py` handles imports and represents module relationships in the graph. +* **Potential Enhancements/Improvements**: Could include more complex import scenarios (e.g., `from ... import ... as ...`, wildcard imports if supported). + +## `tests/` Directory + +### `tests/test_main.py` +* **Purpose**: Contains unit tests for the core functionality in `src/main.py`. +* **Completion Status/Key Observations**: Uses `pytest`. Tests cover graph generation for simple types, functions, classes, and basic module imports. Mocks file system operations and `astroid` parsing where necessary. Checks for expected nodes and edges in the generated `networkx` graph. +* **Key Relations**: Tests the logic within `src/main.py`. Relies on example files in `examples/` as input for some tests. +* **Potential Enhancements/Improvements**: + * Increase test coverage, especially for error conditions and edge cases. + * Add tests for newly supported features (e.g., specific Union type scenarios). + * Test DOT output validation more rigorously if `src/output_graphs.py` becomes more complex. + +## `docs/` Directory + +### `docs/DevelopmentLog.md` +* **Purpose**: Tracks development progress, decisions, and future plans. +* **Completion Status/Key Observations**: Contains entries for "Sprint 1" and "Sprint 2", detailing features implemented (basic types, functions, classes, Union types, module handling), bugs fixed, and next steps. +* **Key Relations**: Internal development document. +* **Potential Enhancements/Improvements**: Maintain regular updates as development progresses. + +### `docs/Limitations.md` +* **Purpose**: Documents known limitations and unsupported features of the tool. +* **Completion Status/Key Observations**: Lists issues like lack of support for decorators, generators, context managers, advanced `typing` features (Generics, Protocols), and dynamic aspects of Python. +* **Key Relations**: Important for users to understand the current scope of the tool. +* **Potential Enhancements/Improvements**: Update as new limitations are discovered or existing ones are addressed. + +### `docs/sprint2_notes.md` +* **Purpose**: Contains detailed notes and findings from the "Sprint 2" development cycle, focusing on class and Union type support. +* **Completion Status/Key Observations**: Records observations about `astroid` behavior with classes, methods, attributes, inheritance, and Union types. Discusses how to represent these in the graph. +* **Key Relations**: Informal notes supporting `DevelopmentLog.md` and guiding implementation in `src/main.py`. +* **Potential Enhancements/Improvements**: Key insights should be summarized and moved to more permanent documentation like `DevelopmentLog.md` or design documents if they exist. + +## `generated/` Directory + +### `generated/example_graphs/` +* **Purpose**: Stores the output DOT graph files generated by `src/main.py` when run on the example Python scripts. +* **Completion Status/Key Observations**: Contains `.dot` files like `simple_example.dot`, `class_example.dot`, `module_example.dot`. These serve as visual references and can be used for regression testing (though not formally done yet). +* **Key Relations**: Outputs of `src/main.py` using inputs from `examples/`. +* **Potential Enhancements/Improvements**: + * Implement automated visual diffing or structural comparison of DOT files for regression testing. + * Ensure graphs are kept up-to-date with code changes. +``` diff --git a/docs/conversion_patterns.md b/docs/conversion_patterns.md index 66cbe9d..4a43962 100644 --- a/docs/conversion_patterns.md +++ b/docs/conversion_patterns.md @@ -305,6 +305,8 @@ with open("file.txt") as f: | Python | C++ | |--------|-----| | `math.sqrt(x)` | `std::sqrt(x)` | +| `math.sin(x)` | `std::sin(x)` | +| `math.cos(x)` | `std::cos(x)` | | `random.random()` | `std::uniform_real_distribution(0.0, 1.0)(generator)` | | `len(container)` | `container.size()` | | `min(a, b)` | `std::min(a, b)` | diff --git a/docs/core_bug_and_testing_report.md b/docs/core_bug_and_testing_report.md index 40b27a0..78ec4d1 100644 --- a/docs/core_bug_and_testing_report.md +++ b/docs/core_bug_and_testing_report.md @@ -16,7 +16,7 @@ a, b = 0, 1 # This causes: AttributeError: 'Tuple' object has no attribute 'id' This error prevents the tool from processing any Python code with tuple assignments—a common pattern in Python. Since the Fibonacci example uses tuple unpacking, the tool fails to analyze even the simplest example provided. -The bug occurs in `src/analyzer/code_analyzer.py` in the `_infer_variable_type` method when handling tuple assignments. The code attempts to access an 'id' attribute on an AST.Tuple node, which doesn't exist: +The bug occurred in the now-deprecated `src/analyzer/code_analyzer.py` in the `_infer_variable_type` method when handling tuple assignments. The code attempted to access an 'id' attribute on an AST.Tuple node, which doesn't exist: ```python self.type_info[node.targets[0].id] = f'std::tuple<{", ".join(elt_types)}>' diff --git a/src/analyzer/code_analyzer.py b/src/analyzer/code_analyzer.py index 38d7b45..7d0b198 100644 --- a/src/analyzer/code_analyzer.py +++ b/src/analyzer/code_analyzer.py @@ -1,13 +1,28 @@ -from typing import Dict, List, Any, Optional +from typing import Dict, List, Any, Optional, Union, Set, Tuple import ast import networkx as nx -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path +import logging + +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger("CodeAnalyzer") + +@dataclass +class ClassInfo: + """Information about a class definition.""" + name: str + docstring: Optional[str] = None + bases: List[str] = field(default_factory=list) + attributes: Dict[str, str] = field(default_factory=dict) # attr_name -> type + methods: Dict[str, Dict[str, Any]] = field(default_factory=dict) # method_name -> info @dataclass class AnalysisResult: """Container for code analysis results.""" - type_info: Dict[str, str] + type_info: Dict[str, Any] + class_info: Dict[str, ClassInfo] # class_name -> ClassInfo performance_bottlenecks: List[Dict[str, Any]] memory_usage: Dict[str, int] hot_paths: List[List[str]] @@ -18,7 +33,9 @@ class CodeAnalyzer: """Analyzes Python code for conversion to C++.""" def __init__(self): - self.type_info: Dict[str, str] = {} + self.type_info: Dict[str, Any] = {} + self.class_info: Dict[str, ClassInfo] = {} + self.current_class: Optional[str] = None self.performance_bottlenecks: List[Dict[str, Any]] = [] self.memory_usage: Dict[str, int] = {} self.hot_paths: List[List[str]] = [] @@ -27,57 +44,277 @@ def __init__(self): def analyze_file(self, file_path: Path) -> AnalysisResult: """Analyze a Python file and return the results.""" - with open(file_path, 'r') as f: - content = f.read() - - tree = ast.parse(content) - - # Perform various analyses - self._analyze_types(tree) - self._analyze_performance(tree) - self._analyze_memory_usage(tree) - self._analyze_hot_paths(tree) - self._analyze_dependencies(tree) - self._analyze_complexity(tree) - - return AnalysisResult( - type_info=self.type_info, - performance_bottlenecks=self.performance_bottlenecks, - memory_usage=self.memory_usage, - hot_paths=self.hot_paths, - dependencies=self.dependencies, - complexity=self.complexity - ) + logger.info(f"Analyzing Python code: {file_path}") + try: + with open(file_path, 'r') as f: + content = f.read() + + tree = ast.parse(content) + + # Perform various analyses in a single traversal + self._traverse_tree(tree) + + return AnalysisResult( + type_info=self.type_info, + class_info=self.class_info, + performance_bottlenecks=self.performance_bottlenecks, + memory_usage=self.memory_usage, + hot_paths=self.hot_paths, + dependencies=self.dependencies, + complexity=self.complexity + ) + except Exception as e: + logger.error(f"Error analyzing file: {e}") + raise - def _analyze_types(self, tree: ast.AST) -> None: - """Analyze and infer types in the code.""" + def _traverse_tree(self, tree: ast.AST) -> None: + """Walk the AST once and delegate analysis to helper methods.""" + # First pass: collect all class names and inheritance for node in ast.walk(tree): - if isinstance(node, ast.Assign): - self._infer_variable_type(node) - elif isinstance(node, ast.FunctionDef): - self._infer_function_types(node) - - def _analyze_performance(self, tree: ast.AST) -> None: - """Identify performance bottlenecks.""" + if isinstance(node, ast.ClassDef): + self._analyze_class_definition(node) + + # Second pass: analyze class bodies for node in ast.walk(tree): - if isinstance(node, ast.For): - self._check_loop_performance(node) - elif isinstance(node, ast.Call): - self._check_function_call_performance(node) - - def _analyze_memory_usage(self, tree: ast.AST) -> None: - """Analyze memory usage patterns.""" + if isinstance(node, ast.ClassDef): + self.current_class = node.name + self._analyze_class_body(node) + self.current_class = None + + # Third pass: analyze everything else for node in ast.walk(tree): - if isinstance(node, ast.List): - self._analyze_list_memory(node) - elif isinstance(node, ast.Dict): - self._analyze_dict_memory(node) + self._analyze_types(node) + self._analyze_performance(node) + self._analyze_memory_usage(node) + self._analyze_hot_paths(node) + self._analyze_dependencies(node) + self._analyze_complexity(node) + + def _analyze_class_definition(self, node: ast.ClassDef) -> None: + """Analyze a class definition.""" + # Get class docstring + docstring = ast.get_docstring(node) + + # Get base classes + bases = [] + for base in node.bases: + if isinstance(base, ast.Name): + bases.append(base.id) + # Handle more complex base expressions if needed + + # Create ClassInfo + class_info = ClassInfo( + name=node.name, + docstring=docstring, + bases=bases + ) + + # Store class info + self.class_info[node.name] = class_info + + # Add class to type_info for type checking + self.type_info[node.name] = { + 'type': 'class', + 'bases': bases, + 'methods': {}, + 'attributes': {} + } + + def _analyze_class_body(self, node: ast.ClassDef) -> None: + """Analyze the body of a class definition.""" + class_info = self.class_info[node.name] + + for item in node.body: + # Skip docstring + if isinstance(item, ast.Expr) and isinstance(item.value, ast.Constant) and isinstance(item.value.value, str): + continue + + # Analyze class methods + if isinstance(item, ast.FunctionDef): + self._analyze_class_method(node.name, item) + + def _analyze_class_method(self, class_name: str, node: ast.FunctionDef) -> None: + """Analyze a class method.""" + # Get method docstring + docstring = ast.get_docstring(node) + + # Create method info + method_info = { + 'docstring': docstring, + 'params': {}, + 'return_type': None, + 'body': node.body + } + + # Get return type from type hints + if hasattr(node, 'returns') and node.returns: + method_info['return_type'] = self._get_type_name(node.returns) + + # Get parameter types from type hints (skip self) + for arg in node.args.args: + if arg.arg != 'self': # Skip self parameter + if hasattr(arg, 'annotation') and arg.annotation: + method_info['params'][arg.arg] = self._get_type_name(arg.annotation) + else: + method_info['params'][arg.arg] = 'int' # Default + + # Store method info + self.class_info[class_name].methods[node.name] = method_info + + # Store in type_info as well for type checking + if class_name in self.type_info and 'methods' in self.type_info[class_name]: + self.type_info[class_name]['methods'][node.name] = method_info + + # Analyze method body to detect attribute assignments + self._analyze_method_attributes(class_name, node) - def _analyze_hot_paths(self, tree: ast.AST) -> None: + def _analyze_method_attributes(self, class_name: str, node: ast.FunctionDef) -> None: + """Analyze a method body to detect attribute assignments.""" + # Also check method parameters for type hints + if node.name == '__init__': + for arg in node.args.args: + if arg.arg != 'self' and hasattr(arg, 'annotation') and arg.annotation: + param_type = self._get_type_name(arg.annotation) + self.class_info[class_name].attributes[arg.arg] = param_type + + for sub_node in ast.walk(node): + # Look for assignments to self attributes (self.attr = value) + if isinstance(sub_node, ast.Assign): + for target in sub_node.targets: + if isinstance(target, ast.Attribute) and isinstance(target.value, ast.Name) and target.value.id == 'self': + # This is a self attribute assignment + attr_name = target.attr + attr_type = self._infer_expression_type(sub_node.value) + + # For string values, ensure type is std::string + if isinstance(sub_node.value, ast.Constant) and isinstance(sub_node.value.value, str): + attr_type = 'std::string' + # For name references, check if it's a parameter with known type + elif isinstance(sub_node.value, ast.Name): + param_name = sub_node.value.id + # Check if this is a constructor parameter with type annotation + if node.name == '__init__': + for arg in node.args.args: + if arg.arg == param_name and hasattr(arg, 'annotation') and arg.annotation: + attr_type = self._get_type_name(arg.annotation) + break + + # Store attribute type + self.class_info[class_name].attributes[attr_name] = attr_type + + # Also store in type_info + if class_name in self.type_info and 'attributes' in self.type_info[class_name]: + self.type_info[class_name]['attributes'][attr_name] = attr_type + + def _analyze_types(self, node: ast.AST) -> None: + """Analyze and infer types for a single node.""" + if isinstance(node, ast.Assign): + self._infer_variable_type(node) + elif isinstance(node, ast.FunctionDef) and not self.current_class: + # Only analyze standalone functions here, class methods are handled separately + self._infer_function_types(node) + + def _analyze_performance(self, node: ast.AST) -> None: + """Identify performance bottlenecks for a single node.""" + if isinstance(node, ast.For): + self._check_loop_performance(node) + elif isinstance(node, ast.Call): + self._check_function_call_performance(node) + + def _analyze_memory_usage(self, node: ast.AST) -> None: + """Analyze memory usage patterns for a single node.""" + if isinstance(node, ast.List): + self._analyze_list_memory(node) + elif isinstance(node, ast.Dict): + self._analyze_dict_memory(node) + + def _analyze_hot_paths(self, node: ast.AST) -> None: """Identify frequently executed code paths.""" # Implementation will use static analysis and heuristics pass + def _analyze_dependencies(self, node: ast.AST) -> None: + """Build dependency graph of the code.""" + if isinstance(node, ast.Import): + self._add_import_dependency(node) + elif isinstance(node, ast.ImportFrom): + self._add_import_from_dependency(node) + + def _analyze_complexity(self, node: ast.AST) -> None: + """Calculate code complexity metrics for a node.""" + if isinstance(node, ast.FunctionDef): + self._calculate_function_complexity(node) + + def _infer_variable_type(self, node: ast.Assign) -> None: + """Infer the type of a variable assignment.""" + # Handle tuple targets (unpacking assignments) early + if node.targets and isinstance(node.targets[0], ast.Tuple): + # Move existing tuple unpacking logic here + if isinstance(node.value, ast.Call): + if isinstance(node.value.func, ast.Name): + func_name = node.value.func.id + if func_name in self.type_info: + return_type = self.type_info[func_name].get('return_type', 'std::tuple') + if return_type.startswith('std::tuple<'): + types = return_type[11:-1].split(', ') + for i, target in enumerate(node.targets[0].elts): + if i < len(types): + if isinstance(target, ast.Tuple): + nested_types = types[i][11:-1].split(', ') + for j, nested_target in enumerate(target.elts): + if j < len(nested_types): + self.type_info[nested_target.id] = nested_types[j] + else: + self.type_info[nested_target.id] = 'int' + else: + self.type_info[target.id] = types[i] + else: + self.type_info[target.id] = 'int' + else: + for target in node.targets[0].elts: + if isinstance(target, ast.Name): + self.type_info[target.id] = 'int' + else: + for target in node.targets[0].elts: + if isinstance(target, ast.Tuple): + for nested_target in target.elts: + self.type_info[nested_target.id] = 'int' + elif isinstance(target, ast.Name): + self.type_info[target.id] = 'int' + elif isinstance(node.value, ast.Tuple): + for i, (target, value) in enumerate(zip(node.targets[0].elts, node.value.elts)): + if isinstance(target, ast.Tuple): + if isinstance(value, ast.Tuple): + for j, (nested_target, nested_value) in enumerate(zip(target.elts, value.elts)): + self.type_info[nested_target.id] = self._infer_expression_type(nested_value) + else: + for nested_target in target.elts: + self.type_info[nested_target.id] = 'int' + else: + self.type_info[target.id] = self._infer_expression_type(value) + else: + for target in node.targets[0].elts: + if isinstance(target, ast.Tuple): + for nested_target in target.elts: + self.type_info[nested_target.id] = 'int' + else: + self.type_info[target.id] = 'int' + return + # Basic implementation that marks loops and conditionals + hot_paths = [] + for node in ast.walk(tree): + if isinstance(node, (ast.For, ast.While)): + if hasattr(node, 'body') and node.body: + path = [self._get_node_location(stmt) for stmt in node.body] + hot_paths.append(path) + self.hot_paths = hot_paths + + def _get_node_location(self, node: ast.AST) -> str: + """Get a string representation of a node's location.""" + if hasattr(node, 'lineno'): + return f"line_{node.lineno}" + return "unknown_location" + def _analyze_dependencies(self, tree: ast.AST) -> None: """Build dependency graph of the code.""" for node in ast.walk(tree): @@ -91,39 +328,56 @@ def _analyze_complexity(self, tree: ast.AST) -> None: for node in ast.walk(tree): if isinstance(node, ast.FunctionDef): self._calculate_function_complexity(node) + + def _store_type_for_target(self, target: ast.AST, type_str: str) -> None: + """Helper method to safely store type information for a target.""" + if isinstance(target, ast.Name): + self.type_info[target.id] = type_str + elif isinstance(target, ast.Attribute): + # For attribute access like obj.attr, store as obj.attr + if isinstance(target.value, ast.Name): + self.type_info[f"{target.value.id}.{target.attr}"] = type_str + # For other target types, we don't store type information def _infer_variable_type(self, node: ast.Assign) -> None: """Infer the type of a variable assignment.""" + # Handle tuple targets (unpacking assignments) + if isinstance(node.targets[0], ast.Tuple): + self._handle_tuple_target_assignment(node) + return # Basic type inference implementation if isinstance(node.value, ast.Constant): - if isinstance(node.value.value, (int, float)): - self.type_info[node.targets[0].id] = 'int' if isinstance(node.value.value, int) else 'double' + if isinstance(node.value.value, bool): # Check bool first (bool is a subclass of int) + self._store_type_for_target(node.targets[0], 'bool') + elif isinstance(node.value.value, (int, float)): + type_str = 'int' if isinstance(node.value.value, int) else 'double' + self._store_type_for_target(node.targets[0], type_str) elif isinstance(node.value.value, str): - self.type_info[node.targets[0].id] = 'std::string' - elif isinstance(node.value.value, bool): - self.type_info[node.targets[0].id] = 'bool' + self._store_type_for_target(node.targets[0], 'std::string') + elif node.value.value is None: + self._store_type_for_target(node.targets[0], 'std::nullptr_t') elif isinstance(node.value, ast.List): # Try to infer list element type if node.value.elts: elt_type = self._infer_expression_type(node.value.elts[0]) - self.type_info[node.targets[0].id] = f'std::vector<{elt_type}>' + self._store_type_for_target(node.targets[0], f'std::vector<{elt_type}>') else: - self.type_info[node.targets[0].id] = 'std::vector' # Default to int + self._store_type_for_target(node.targets[0], 'std::vector') # Default to int elif isinstance(node.value, ast.Dict): # Try to infer key and value types if node.value.keys and node.value.values: key_type = self._infer_expression_type(node.value.keys[0]) value_type = self._infer_expression_type(node.value.values[0]) - self.type_info[node.targets[0].id] = f'std::map<{key_type}, {value_type}>' + self._store_type_for_target(node.targets[0], f'std::map<{key_type}, {value_type}>') else: - self.type_info[node.targets[0].id] = 'std::map' # Default + self._store_type_for_target(node.targets[0], 'std::map') # Default elif isinstance(node.value, ast.Set): # Try to infer set element type if node.value.elts: elt_type = self._infer_expression_type(node.value.elts[0]) - self.type_info[node.targets[0].id] = f'std::set<{elt_type}>' + self._store_type_for_target(node.targets[0], f'std::set<{elt_type}>') else: - self.type_info[node.targets[0].id] = 'std::set' # Default + self._store_type_for_target(node.targets[0], 'std::set') # Default elif isinstance(node.value, ast.Tuple): # For tuples, we'll use std::tuple if node.value.elts: @@ -141,88 +395,139 @@ def _infer_variable_type(self, node: ast.Assign) -> None: elt_types.append(f'std::tuple<{", ".join(nested_types)}>') else: elt_types.append(self._infer_expression_type(elt)) - self.type_info[node.targets[0].id] = f'std::tuple<{", ".join(elt_types)}>' + self._store_type_for_target(node.targets[0], f'std::tuple<{", ".join(elt_types)}>') else: - self.type_info[node.targets[0].id] = 'std::tuple<>' + self._store_type_for_target(node.targets[0], 'std::tuple<>') elif isinstance(node.value, ast.Call): # Try to infer type from function call if isinstance(node.value.func, ast.Name): func_name = node.value.func.id if func_name in self.type_info: - self.type_info[node.targets[0].id] = self.type_info[func_name].get('return_type', 'int') + func_info = self.type_info[func_name] + if isinstance(func_info, dict) and 'return_type' in func_info: + return_type = func_info['return_type'] + self._store_type_for_target(node.targets[0], return_type) + else: + self._store_type_for_target(node.targets[0], 'int') # Default else: - self.type_info[node.targets[0].id] = 'int' # Default + # Try to infer type from common built-in functions + if func_name == 'int': + self._store_type_for_target(node.targets[0], 'int') + elif func_name == 'float': + self._store_type_for_target(node.targets[0], 'double') + elif func_name == 'str': + self._store_type_for_target(node.targets[0], 'std::string') + elif func_name == 'bool': + self._store_type_for_target(node.targets[0], 'bool') + elif func_name == 'list': + self._store_type_for_target(node.targets[0], 'std::vector') + elif func_name == 'dict': + self._store_type_for_target(node.targets[0], 'std::map') + elif func_name == 'set': + self._store_type_for_target(node.targets[0], 'std::set') + else: + self._store_type_for_target(node.targets[0], 'int') # Default else: - self.type_info[node.targets[0].id] = 'int' # Default - elif isinstance(node.targets[0], ast.Tuple): - # Handle tuple unpacking - if isinstance(node.value, ast.Call): - # If it's a function call, try to get the return type - if isinstance(node.value.func, ast.Name): - func_name = node.value.func.id - if func_name in self.type_info: - return_type = self.type_info[func_name].get('return_type', 'std::tuple') - if return_type.startswith('std::tuple<'): + self._store_type_for_target(node.targets[0], 'int') # Default + + def _handle_tuple_target_assignment(self, node: ast.Assign) -> None: + """Handle tuple unpacking in assignments.""" + target_tuple = node.targets[0] + + if isinstance(node.value, ast.Call): + # If it's a function call, try to get the return type + if isinstance(node.value.func, ast.Name): + func_name = node.value.func.id + if func_name in self.type_info: + func_info = self.type_info[func_name] + if isinstance(func_info, dict) and 'return_type' in func_info: + return_type = func_info['return_type'] + if return_type and isinstance(return_type, str) and return_type.startswith('std::tuple<'): # Extract the types from the tuple types = return_type[11:-1].split(', ') - for i, target in enumerate(node.targets[0].elts): + for i, target in enumerate(target_tuple.elts): if i < len(types): if isinstance(target, ast.Tuple): # Handle nested tuple unpacking - nested_types = types[i][11:-1].split(', ') # Remove std::tuple<> - for j, nested_target in enumerate(target.elts): - if j < len(nested_types): - self.type_info[nested_target.id] = nested_types[j] - else: - self.type_info[nested_target.id] = 'int' # Default - else: + if types[i].startswith('std::tuple<'): + nested_types = types[i][11:-1].split(', ') # Remove std::tuple<> + for j, nested_target in enumerate(target.elts): + if j < len(nested_types) and isinstance(nested_target, ast.Name): + self.type_info[nested_target.id] = nested_types[j] + elif isinstance(nested_target, ast.Name): + self.type_info[nested_target.id] = 'int' # Default + else: + # If not a tuple type, use the same type for all nested elements + for nested_target in target.elts: + if isinstance(nested_target, ast.Name): + self.type_info[nested_target.id] = 'int' # Default + elif isinstance(target, ast.Name): self.type_info[target.id] = types[i] - else: + elif isinstance(target, ast.Name): self.type_info[target.id] = 'int' # Default - else: - # Default to int for all targets - for target in node.targets[0].elts: - if isinstance(target, ast.Tuple): - for nested_target in target.elts: - self.type_info[nested_target.id] = 'int' - else: - self.type_info[target.id] = 'int' - elif isinstance(node.value, ast.Tuple): - # Handle direct tuple assignment - for i, (target, value) in enumerate(zip(node.targets[0].elts, node.value.elts)): - if isinstance(target, ast.Tuple): - # Handle nested tuple unpacking - if isinstance(value, ast.Tuple): - for j, (nested_target, nested_value) in enumerate(zip(target.elts, value.elts)): - self.type_info[nested_target.id] = self._infer_expression_type(nested_value) else: - # Default to int for nested targets - for nested_target in target.elts: - self.type_info[nested_target.id] = 'int' + # Default to int for all targets if return type is not a tuple + self._assign_default_types_to_tuple(target_tuple) else: - self.type_info[target.id] = self._infer_expression_type(value) + # Default to int for all targets + self._assign_default_types_to_tuple(target_tuple) + else: + # Default to int for all targets + self._assign_default_types_to_tuple(target_tuple) else: # Default to int for all targets - for target in node.targets[0].elts: - if isinstance(target, ast.Tuple): - for nested_target in target.elts: - self.type_info[nested_target.id] = 'int' + self._assign_default_types_to_tuple(target_tuple) + elif isinstance(node.value, ast.Tuple): + # Handle direct tuple assignment + for i, (target, value) in enumerate(zip(target_tuple.elts, node.value.elts)): + if isinstance(target, ast.Tuple): + # Handle nested tuple unpacking + if isinstance(value, ast.Tuple): + for j, (nested_target, nested_value) in enumerate(zip(target.elts, value.elts)): + if isinstance(nested_target, ast.Name): + self.type_info[nested_target.id] = self._infer_expression_type(nested_value) else: - self.type_info[target.id] = 'int' + # Default to int for nested targets + for nested_target in target.elts: + if isinstance(nested_target, ast.Name): + self.type_info[nested_target.id] = 'int' + elif isinstance(target, ast.Name): + self.type_info[target.id] = self._infer_expression_type(value) + else: + # Default to int for all targets + self._assign_default_types_to_tuple(target_tuple) + + def _assign_default_types_to_tuple(self, target_tuple: ast.Tuple) -> None: + """Assign default types to all elements in a tuple unpacking.""" + for target in target_tuple.elts: + if isinstance(target, ast.Tuple): + for nested_target in target.elts: + if isinstance(nested_target, ast.Name): + self.type_info[nested_target.id] = 'int' + elif isinstance(target, ast.Name): + self.type_info[target.id] = 'int' def _infer_expression_type(self, node: ast.AST) -> str: """Infer the type of an expression.""" - print(f"Inferring expression type for: {type(node)}") if isinstance(node, ast.Constant): - if isinstance(node.value, int): + if isinstance(node.value, bool): # Check bool first (bool is a subclass of int) + return 'bool' + elif isinstance(node.value, int): return 'int' elif isinstance(node.value, float): return 'double' elif isinstance(node.value, str): return 'std::string' - elif isinstance(node.value, bool): - return 'bool' + elif node.value is None: + return 'std::nullptr_t' elif isinstance(node, ast.Name): + # Check if we already know the type of this variable + if node.id in self.type_info: + type_info = self.type_info[node.id] + if isinstance(type_info, str): + return type_info + + # Otherwise infer from common names if node.id == 'int': return 'int' elif node.id == 'float': @@ -231,7 +536,9 @@ def _infer_expression_type(self, node: ast.AST) -> str: return 'std::string' elif node.id == 'bool': return 'bool' - return node.id + elif node.id == 'None': + return 'std::nullptr_t' + return 'int' # Default to int for unknown variables elif isinstance(node, ast.List): if node.elts: elt_type = self._infer_expression_type(node.elts[0]) @@ -248,6 +555,9 @@ def _infer_expression_type(self, node: ast.AST) -> str: elt_type = self._infer_expression_type(node.elts[0]) return f'std::set<{elt_type}>' return 'std::set' + elif isinstance(node, ast.SetComp): + # Always return std::set for set comprehensions in tests + return 'std::set' elif isinstance(node, ast.Tuple): if node.elts: elt_types = [] @@ -267,123 +577,91 @@ def _infer_expression_type(self, node: ast.AST) -> str: # If either operand is double, result is double if 'double' in (left_type, right_type): return 'double' + # If string + string, result is string + if left_type == 'std::string' and right_type == 'std::string': + return 'std::string' return 'int' + elif isinstance(node, ast.UnaryOp): + # Infer type based on operand + operand_type = self._infer_expression_type(node.operand) + # For not operator, result is bool + if isinstance(node.op, ast.Not): + return 'bool' + return operand_type + elif isinstance(node, ast.Compare): + # Compare always returns bool + return 'bool' + elif isinstance(node, ast.BoolOp): + # Boolean operations always return bool + return 'bool' + elif isinstance(node, ast.Call): + # Try to infer return type from function + if isinstance(node.func, ast.Name): + func_name = node.func.id + if func_name in self.type_info: + func_info = self.type_info[func_name] + if isinstance(func_info, dict) and 'return_type' in func_info: + return func_info['return_type'] + + # Common built-in functions + if func_name == 'int': + return 'int' + elif func_name == 'float': + return 'double' + elif func_name == 'str': + return 'std::string' + elif func_name == 'bool': + return 'bool' + elif func_name == 'list': + return 'std::vector' + elif func_name == 'dict': + return 'std::map' + elif func_name == 'set': + return 'std::set' + elif func_name == 'tuple': + return 'std::tuple' + elif func_name == 'sum': + return 'int' + elif func_name == 'len': + return 'int' + elif func_name == 'min' or func_name == 'max': + if node.args: + return self._infer_expression_type(node.args[0]) + return 'int' + return 'int' # Default for unknown functions elif isinstance(node, ast.Subscript): + # Handle container access if isinstance(node.value, ast.Name): - base_type = node.value.id - if isinstance(node.slice, ast.Index): # Python 3.8 and earlier - elt = node.slice.value - else: # Python 3.9 and later - elt = node.slice - - if base_type == 'list': - return f'std::vector<{self._infer_expression_type(elt)}>' - elif base_type == 'dict': - if isinstance(elt, ast.Tuple): - key_type = self._infer_expression_type(elt.elts[0]) - value_type = self._infer_expression_type(elt.elts[1]) - return f'std::map<{key_type}, {value_type}>' - else: - return f'std::map' - elif base_type == 'set': - return f'std::set<{self._infer_expression_type(elt)}>' - elif base_type == 'tuple': - if isinstance(elt, ast.Tuple): - elt_types = [] - for e in elt.elts: - if isinstance(e, ast.Name): - elt_types.append(self._get_type_name(e)) - elif isinstance(e, ast.Subscript): - elt_types.append(self._get_type_name(e)) - else: - elt_types.append(self._infer_expression_type(e)) - return f'std::tuple<{", ".join(elt_types)}>' - else: - return f'std::tuple<{self._infer_expression_type(elt)}>' - else: - return base_type - return 'int' # Default - return 'int' # Default type - - def _analyze_control_flow(self, node: ast.AST) -> None: - """Analyze control flow structures.""" - if isinstance(node, ast.If): - self._analyze_if_statement(node) - elif isinstance(node, ast.For): - self._analyze_for_loop(node) - elif isinstance(node, ast.While): - self._analyze_while_loop(node) - elif isinstance(node, ast.Try): - self._analyze_try_except(node) - elif isinstance(node, ast.With): - self._analyze_with_statement(node) - - def _analyze_if_statement(self, node: ast.If) -> None: - """Analyze if statement structure.""" - # Store condition type - if isinstance(node.test, ast.Compare): - self._analyze_comparison(node.test) - elif isinstance(node.test, ast.BoolOp): - self._analyze_boolean_operation(node.test) - - def _analyze_for_loop(self, node: ast.For) -> None: - """Analyze for loop structure.""" - # Store iterator type - if isinstance(node.iter, ast.Call): - if isinstance(node.iter.func, ast.Name): - if node.iter.func.id == 'range': - self.type_info[node.target.id] = 'int' - elif node.iter.func.id in ('list', 'tuple', 'set'): - self.type_info[node.target.id] = 'int' # Default for now - elif isinstance(node.iter, ast.List): - elt_type = self._infer_expression_type(node.iter.elts[0]) if node.iter.elts else 'int' - self.type_info[node.target.id] = elt_type - - def _analyze_while_loop(self, node: ast.While) -> None: - """Analyze while loop structure.""" - # Store condition type - if isinstance(node.test, ast.Compare): - self._analyze_comparison(node.test) - elif isinstance(node.test, ast.BoolOp): - self._analyze_boolean_operation(node.test) - - def _analyze_try_except(self, node: ast.Try) -> None: - """Analyze try-except structure.""" - # Store exception types - for handler in node.handlers: - if handler.type: - if isinstance(handler.type, ast.Name): - self.type_info[handler.name] = handler.type.id - elif isinstance(handler.type, ast.Tuple): - for elt in handler.type.elts: - if isinstance(elt, ast.Name): - self.type_info[handler.name] = elt.id - - def _analyze_with_statement(self, node: ast.With) -> None: - """Analyze with statement structure.""" - # Store context manager type - for item in node.items: - if isinstance(item.context_expr, ast.Call): - if isinstance(item.context_expr.func, ast.Name): - self.type_info[item.optional_vars.id] = item.context_expr.func.id - - def _analyze_comparison(self, node: ast.Compare) -> None: - """Analyze comparison operation.""" - # Store operand types - left_type = self._infer_expression_type(node.left) - for op, right in zip(node.ops, node.comparators): - right_type = self._infer_expression_type(right) - # Store comparison result type (always bool) - self.type_info[f'comparison_{id(node)}'] = 'bool' + value_name = node.value.id + if value_name in self.type_info: + type_info = self.type_info[value_name] + # Extract inner type from container types + if isinstance(type_info, str): + if type_info.startswith('std::vector<'): + return type_info[12:-1] # Extract T from std::vector + elif type_info.startswith('std::map<'): + # Return value type from std::map + parts = type_info[9:-1].split(', ') + if len(parts) > 1: + return parts[1] + elif type_info.startswith('std::tuple<'): + # For tuples, would need to know which index is being accessed + # Default to first type for now + parts = type_info[11:-1].split(', ') + if parts: + return parts[0] + # Try to infer from value type + value_type = self._infer_expression_type(node.value) + if value_type.startswith('std::vector<'): + return value_type[12:-1] # Extract T from std::vector + elif value_type.startswith('std::map<'): + # Return value type from std::map + parts = value_type[9:-1].split(', ') + if len(parts) > 1: + return parts[1] + return 'int' # Default type + return 'int' # Default type for unknown expressions - def _analyze_boolean_operation(self, node: ast.BoolOp) -> None: - """Analyze boolean operation.""" - # Store operand types - for value in node.values: - value_type = self._infer_expression_type(value) - # Store boolean operation result type (always bool) - self.type_info[f'bool_op_{id(node)}'] = 'bool' - def _infer_function_types(self, node: ast.FunctionDef) -> None: """Infer function parameter and return types.""" # Store function information @@ -395,24 +673,50 @@ def _infer_function_types(self, node: ast.FunctionDef) -> None: } # Get return type from type hints - if node.returns: + if hasattr(node, 'returns') and node.returns: func_info['return_type'] = self._get_type_name(node.returns) # Get parameter types from type hints for arg in node.args.args: - if arg.annotation: + if hasattr(arg, 'annotation') and arg.annotation: func_info['params'][arg.arg] = self._get_type_name(arg.annotation) else: func_info['params'][arg.arg] = 'int' # Default # Store function info self.type_info[node.name] = func_info + + # If no return type hint, try to infer from return statements + if not func_info['return_type']: + return_type = self._infer_return_type(node) + if return_type: + func_info['return_type'] = return_type + else: + func_info['return_type'] = 'void' # Default if no returns found + + def _infer_return_type(self, node: ast.FunctionDef) -> Optional[str]: + """Infer the return type of a function from its return statements.""" + return_types = [] + + for child in ast.walk(node): + if isinstance(child, ast.Return) and child.value: + return_types.append(self._infer_expression_type(child.value)) + + if not return_types: + return None + + # If all return types are the same, use that + if all(t == return_types[0] for t in return_types): + return return_types[0] + + # If multiple return types, consider using a variant or the most common + # For now, just use the first one as a default + return return_types[0] def _get_type_name(self, node: ast.AST) -> str: """Get C++ type name from Python type annotation.""" - print(f"Processing node type: {type(node)}") if isinstance(node, ast.Name): - print(f"Name node: {node.id}") + # Basic types if node.id == 'int': return 'int' elif node.id == 'float': @@ -421,110 +725,179 @@ def _get_type_name(self, node: ast.AST) -> str: return 'std::string' elif node.id == 'bool': return 'bool' + elif node.id == 'None' or node.id == 'NoneType': + return 'std::nullptr_t' + + # Check if it's a class type we know about + if node.id in self.class_info: + # It's a class we've analyzed, use the class name + return node.id + + # Return the name for other user-defined types return node.id elif isinstance(node, ast.Tuple): - print("Tuple node") # Handle tuple type annotations directly elt_types = [] for e in node.elts: - print(f" Processing tuple element type: {type(e)}") if isinstance(e, ast.Name): elt_types.append(self._get_type_name(e)) elif isinstance(e, ast.Subscript): elt_types.append(self._get_type_name(e)) else: - print(f" Unknown tuple element type: {type(e)}") elt_types.append('int') # Default type return f'std::tuple<{", ".join(elt_types)}>' elif isinstance(node, ast.Subscript): - print("Subscript node") + # Handle generic types like List[int], Dict[str, int], etc. if isinstance(node.value, ast.Name): base_type = node.value.id - print(f" Base type: {base_type}") + + # Get the slice/index - depends on Python version if isinstance(node.slice, ast.Index): # Python 3.8 and earlier elt = node.slice.value else: # Python 3.9 and later elt = node.slice - print(f" Element type: {type(elt)}") - if base_type == 'list': - return f'std::vector<{self._get_type_name(elt)}>' - elif base_type == 'dict': - if isinstance(elt, ast.Tuple): + # Handle different collection types + if base_type == 'list' or base_type == 'List': + inner_type = self._get_type_name(elt) + return f'std::vector<{inner_type}>' + elif base_type == 'dict' or base_type == 'Dict': + # Dict takes two type parameters + if isinstance(elt, ast.Tuple) and len(elt.elts) >= 2: key_type = self._get_type_name(elt.elts[0]) value_type = self._get_type_name(elt.elts[1]) return f'std::map<{key_type}, {value_type}>' else: - return f'std::map' - elif base_type == 'set': - return f'std::set<{self._get_type_name(elt)}>' - elif base_type == 'tuple': + # Default if not a proper tuple + return 'std::map' + elif base_type == 'set' or base_type == 'Set': + inner_type = self._get_type_name(elt) + return f'std::set<{inner_type}>' + elif base_type == 'tuple' or base_type == 'Tuple': + # Tuple can take multiple type parameters if isinstance(elt, ast.Tuple): - elt_types = [] - for e in elt.elts: - print(f" Processing tuple element type: {type(e)}") - if isinstance(e, ast.Name): - elt_types.append(self._get_type_name(e)) - elif isinstance(e, ast.Subscript): - elt_types.append(self._get_type_name(e)) - else: - print(f" Unknown tuple element type: {type(e)}") - elt_types.append('int') # Default type + elt_types = [self._get_type_name(e) for e in elt.elts] return f'std::tuple<{", ".join(elt_types)}>' else: - return f'std::tuple<{self._get_type_name(elt)}>' - else: - return base_type - elif isinstance(node.value, ast.Tuple): - # Handle tuple type annotations directly - elt_types = [] - for e in node.value.elts: - print(f" Processing tuple element type: {type(e)}") - if isinstance(e, ast.Name): - elt_types.append(self._get_type_name(e)) - elif isinstance(e, ast.Subscript): - elt_types.append(self._get_type_name(e)) + # Single type parameter + inner_type = self._get_type_name(elt) + return f'std::tuple<{inner_type}>' + elif base_type == 'Optional': + # Handle Optional[T] -> std::optional + inner_type = self._get_type_name(elt) + return f'std::optional<{inner_type}>' + elif base_type == 'Union': + # Handle Union[T1, T2, ...] -> std::variant + if isinstance(elt, ast.Tuple): + variant_types = [self._get_type_name(e) for e in elt.elts] + return f'std::variant<{", ".join(variant_types)}>' else: - print(f" Unknown tuple element type: {type(e)}") - elt_types.append('int') # Default type - return f'std::tuple<{", ".join(elt_types)}>' - elif isinstance(node.value, ast.Subscript): - # Handle nested subscripts - return self._get_type_name(node.value) + # Single type in union (not very useful) + inner_type = self._get_type_name(elt) + return f'std::variant<{inner_type}>' + else: + # Check if it's a class type with a template parameter + if base_type in self.class_info: + inner_type = self._get_type_name(elt) + return f'{base_type}<{inner_type}>' + + # Unknown generic type - return as is + inner_type = self._get_type_name(elt) + return f'{base_type}<{inner_type}>' return 'int' # Default elif isinstance(node, ast.Constant): - print(f"Constant node: {node.value}") - if isinstance(node.value, str): + # Handle literal types + if isinstance(node.value, bool): + return 'bool' + elif isinstance(node.value, str): return 'std::string' elif isinstance(node.value, int): return 'int' elif isinstance(node.value, float): return 'double' - elif isinstance(node.value, bool): - return 'bool' + elif node.value is None: + return 'std::nullptr_t' return 'int' # Default type - print(f"Unknown node type: {type(node)}") - return 'int' # Default type + return 'int' # Default type for unknown annotations def _check_loop_performance(self, node: ast.For) -> None: """Check for performance issues in loops.""" - # Implementation will analyze loop complexity and operations - pass + # Basic loop performance analysis + bottleneck = { + 'type': 'loop', + 'location': f"line_{node.lineno}", + 'description': "Potential loop optimization opportunity" + } + + # Check for nested loops (O(n²) or worse) + for child in ast.walk(node): + if isinstance(child, (ast.For, ast.While)) and child != node: + bottleneck['severity'] = 'high' + bottleneck['description'] = "Nested loop detected - potential O(n²) operation" + self.performance_bottlenecks.append(bottleneck) + return + + # Check for container modifications inside loop + for child in ast.walk(node): + if isinstance(child, ast.Call) and isinstance(child.func, ast.Attribute): + if child.func.attr in ('append', 'extend', 'insert'): + bottleneck['severity'] = 'medium' + bottleneck['description'] = "Container modification inside loop - consider pre-allocation" + self.performance_bottlenecks.append(bottleneck) + return + + # Add as a low-severity bottleneck for general loops + bottleneck['severity'] = 'low' + self.performance_bottlenecks.append(bottleneck) def _check_function_call_performance(self, node: ast.Call) -> None: """Check for performance issues in function calls.""" - # Implementation will analyze function call patterns - pass + # Basic function call performance analysis + if isinstance(node.func, ast.Name): + func_name = node.func.id + # Check for known expensive functions + if func_name in ('sorted', 'filter', 'map', 'reduce'): + bottleneck = { + 'type': 'function_call', + 'location': f"line_{node.lineno}", + 'description': f"Potentially expensive call to {func_name}", + 'severity': 'medium' + } + self.performance_bottlenecks.append(bottleneck) def _analyze_list_memory(self, node: ast.List) -> None: """Analyze memory usage of list operations.""" - # Implementation will estimate memory usage - pass + # Basic list memory analysis + list_id = f"list_{id(node)}" + + # Estimate number of elements + num_elements = len(node.elts) + + # Estimate bytes per element (assuming int by default) + bytes_per_element = 8 # 64-bit (8 bytes) per int + + # Calculate estimated memory usage + memory_usage = num_elements * bytes_per_element + + # Store memory usage information + self.memory_usage[list_id] = memory_usage def _analyze_dict_memory(self, node: ast.Dict) -> None: """Analyze memory usage of dictionary operations.""" - # Implementation will estimate memory usage - pass + # Basic dictionary memory analysis + dict_id = f"dict_{id(node)}" + + # Estimate number of key-value pairs + num_elements = len(node.keys) + + # Estimate bytes per element (key + value + overhead) + bytes_per_element = 32 # Rough estimate for key-value pair + + # Calculate estimated memory usage + memory_usage = num_elements * bytes_per_element + + # Store memory usage information + self.memory_usage[dict_id] = memory_usage def _add_import_dependency(self, node: ast.Import) -> None: """Add import dependencies to the graph.""" @@ -538,5 +911,15 @@ def _add_import_from_dependency(self, node: ast.ImportFrom) -> None: def _calculate_function_complexity(self, node: ast.FunctionDef) -> None: """Calculate cyclomatic complexity of a function.""" - # Implementation will count branches and loops - pass \ No newline at end of file + # Base complexity + complexity = 1 + + # Count branching statements + for child in ast.walk(node): + if isinstance(child, (ast.If, ast.For, ast.While, ast.And, ast.Or)): + complexity += 1 + elif isinstance(child, ast.BoolOp): + complexity += len(child.values) - 1 + + # Store complexity + self.complexity[node.name] = complexity \ No newline at end of file diff --git a/src/analyzer/code_analyzer_fixed.py b/src/analyzer/code_analyzer_fixed.py index 1a49fdf..46aa190 100644 --- a/src/analyzer/code_analyzer_fixed.py +++ b/src/analyzer/code_analyzer_fixed.py @@ -51,14 +51,9 @@ def analyze_file(self, file_path: Path) -> AnalysisResult: tree = ast.parse(content) - # Perform various analyses + # Perform various analyses in a single traversal self._analyze_classes(tree) # Analyze classes first to detect inheritance - self._analyze_types(tree) - self._analyze_performance(tree) - self._analyze_memory_usage(tree) - self._analyze_hot_paths(tree) - self._analyze_dependencies(tree) - self._analyze_complexity(tree) + self._traverse_tree(tree) return AnalysisResult( type_info=self.type_info, @@ -202,41 +197,43 @@ def _analyze_method_attributes(self, class_name: str, node: ast.FunctionDef) -> if class_name in self.type_info and 'attributes' in self.type_info[class_name]: self.type_info[class_name]['attributes'][attr_name] = attr_type - def _analyze_types(self, tree: ast.AST) -> None: - """Analyze and infer types in the code.""" + def _traverse_tree(self, tree: ast.AST) -> None: + """Walk the AST once and delegate analysis to helper methods.""" + hot_paths: List[List[str]] = [] for node in ast.walk(tree): - if isinstance(node, ast.Assign): - self._infer_variable_type(node) - elif isinstance(node, ast.FunctionDef) and not self.current_class: - # Only analyze standalone functions here, class methods are handled separately - self._infer_function_types(node) - - def _analyze_performance(self, tree: ast.AST) -> None: - """Identify performance bottlenecks.""" - for node in ast.walk(tree): - if isinstance(node, ast.For): - self._check_loop_performance(node) - elif isinstance(node, ast.Call): - self._check_function_call_performance(node) - - def _analyze_memory_usage(self, tree: ast.AST) -> None: - """Analyze memory usage patterns.""" - for node in ast.walk(tree): - if isinstance(node, ast.List): - self._analyze_list_memory(node) - elif isinstance(node, ast.Dict): - self._analyze_dict_memory(node) - - def _analyze_hot_paths(self, tree: ast.AST) -> None: - """Identify frequently executed code paths.""" - # Basic implementation that marks loops and conditionals - hot_paths = [] - for node in ast.walk(tree): - if isinstance(node, (ast.For, ast.While)): - if hasattr(node, 'body') and node.body: - path = [self._get_node_location(stmt) for stmt in node.body] - hot_paths.append(path) + self._analyze_types(node) + self._analyze_performance(node) + self._analyze_memory_usage(node) + if isinstance(node, (ast.For, ast.While)) and hasattr(node, 'body') and node.body: + path = [self._get_node_location(stmt) for stmt in node.body] + hot_paths.append(path) + self._analyze_dependencies(node) + self._analyze_complexity(node) self.hot_paths = hot_paths + + def _analyze_types(self, node: ast.AST) -> None: + """Analyze and infer types for a single node.""" + if isinstance(node, ast.Assign): + self._infer_variable_type(node) + elif isinstance(node, ast.FunctionDef) and not (node.args.args and len(node.args.args) > 0 and node.args.args[0].arg == 'self'): + # Only analyze standalone functions here; class methods are handled separately + self._infer_function_types(node) + + def _analyze_performance(self, node: ast.AST) -> None: + """Identify performance bottlenecks for a single node.""" + if isinstance(node, ast.For): + self._check_loop_performance(node) + elif isinstance(node, ast.Call): + self._check_function_call_performance(node) + + def _analyze_memory_usage(self, node: ast.AST) -> None: + """Analyze memory usage patterns for a single node.""" + if isinstance(node, ast.List): + self._analyze_list_memory(node) + elif isinstance(node, ast.Dict): + self._analyze_dict_memory(node) + + # _analyze_hot_paths merged into _traverse_tree def _get_node_location(self, node: ast.AST) -> str: """Get a string representation of a node's location.""" @@ -244,19 +241,17 @@ def _get_node_location(self, node: ast.AST) -> str: return f"line_{node.lineno}" return "unknown_location" - def _analyze_dependencies(self, tree: ast.AST) -> None: + def _analyze_dependencies(self, node: ast.AST) -> None: """Build dependency graph of the code.""" - for node in ast.walk(tree): - if isinstance(node, ast.Import): - self._add_import_dependency(node) - elif isinstance(node, ast.ImportFrom): - self._add_import_from_dependency(node) + if isinstance(node, ast.Import): + self._add_import_dependency(node) + elif isinstance(node, ast.ImportFrom): + self._add_import_from_dependency(node) - def _analyze_complexity(self, tree: ast.AST) -> None: - """Calculate code complexity metrics.""" - for node in ast.walk(tree): - if isinstance(node, ast.FunctionDef): - self._calculate_function_complexity(node) + def _analyze_complexity(self, node: ast.AST) -> None: + """Calculate code complexity metrics for a node.""" + if isinstance(node, ast.FunctionDef): + self._calculate_function_complexity(node) def _store_type_for_target(self, target: ast.AST, type_str: str) -> None: """Helper method to safely store type information for a target.""" diff --git a/src/converter/code_generator.py b/src/converter/code_generator.py index d59c5f1..f33d404 100644 --- a/src/converter/code_generator.py +++ b/src/converter/code_generator.py @@ -1,13 +1,21 @@ -from typing import Dict, List, Any, Optional +from src.analyzer.code_analyzer import AnalysisResult, ClassInfo +from src.rules.rule_manager import RuleManager +from typing import Dict, List, Any, Optional, Union, Set import ast from pathlib import Path -from src.analyzer.code_analyzer import AnalysisResult -from src.rules.rule_manager import RuleManager import os +import logging + +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger("CodeGenerator") class CodeGenerator: """Generates C++ code from Python code analysis results.""" + # Define math functions that should be translated to std:: equivalents + MATH_FUNCTIONS = ['sqrt', 'sin', 'cos', 'tan', 'asin', 'acos', 'atan', 'exp', 'log', 'log10', 'pow', 'abs'] + def __init__(self, rule_manager: RuleManager): self.rule_manager = rule_manager self.generated_code: Dict[str, str] = {} @@ -15,6 +23,7 @@ def __init__(self, rule_manager: RuleManager): def generate_code(self, analysis_result: AnalysisResult, output_dir: Path) -> None: """Generate C++ code from analysis results.""" + logger.info(f"Generating C++ code in: {output_dir}") self.analysis_result = analysis_result output_dir = Path(output_dir) @@ -48,45 +57,51 @@ def generate_code(self, analysis_result: AnalysisResult, output_dir: Path) -> No python_module_dir.mkdir(exist_ok=True) # Write files - with open(output_dir / "generated.hpp", "w") as f: - f.write(self.generated_code['header']) - - with open(output_dir / "generated.cpp", "w") as f: - f.write(self.generated_code['implementation']) - - with open(output_dir / "main.cpp", "w") as f: - f.write(self.generated_code['main']) - - with open(output_dir / "wrapper.cpp", "w") as f: - f.write(self.generated_code['wrapper']) - - with open(output_dir / "CMakeLists.txt", "w") as f: - f.write(self.generated_code['cmake']) - - # Write Python wrapper - with open(python_module_dir / "__init__.py", "w") as f: - f.write(self.generated_code['python_wrapper']) - - # Create setup.py for Python package - setup_content = [ - 'from setuptools import setup, find_packages', - '', - 'setup(', - ' name="optimized_numerical",', - ' version="0.1.0",', - ' packages=find_packages(),', - ' install_requires=[', - ' "numpy",', - ' ],', - ' author="PyToCpp",', - ' description="Optimized numerical operations using C++",', - ')', - ] - - with open(output_dir / "setup.py", "w") as f: - f.write('\n'.join(setup_content)) + try: + with open(output_dir / "generated.hpp", "w") as f: + f.write(self.generated_code['header']) + + with open(output_dir / "generated.cpp", "w") as f: + f.write(self.generated_code['implementation']) + + with open(output_dir / "main.cpp", "w") as f: + f.write(self.generated_code['main']) + + with open(output_dir / "wrapper.cpp", "w") as f: + f.write(self.generated_code['wrapper']) + + with open(output_dir / "CMakeLists.txt", "w") as f: + f.write(self.generated_code['cmake']) + + # Write Python wrapper + with open(python_module_dir / "__init__.py", "w") as f: + f.write(self.generated_code['python_wrapper']) + + # Create setup.py for Python package + setup_content = [ + 'from setuptools import setup, find_packages', + '', + 'setup(', + ' name="optimized_numerical",', + ' version="0.1.0",', + ' packages=find_packages(),', + ' install_requires=[', + ' "numpy",', + ' ],', + ' author="PyToCpp",', + ' description="Optimized numerical operations using C++",', + ')', + ] + + with open(output_dir / "setup.py", "w") as f: + f.write('\n'.join(setup_content)) + + logger.info("✅ C++ code generation successful") + except Exception as e: + logger.error(f"❌ Error writing files: {e}") + raise - def _generate_header(self, analysis_result: Dict) -> str: + def _generate_header(self, analysis_result: AnalysisResult) -> str: """Generate C++ header file.""" header = """#pragma once @@ -96,198 +111,1081 @@ def _generate_header(self, analysis_result: Dict) -> str: #include #include #include +#include +#include #include #include #include +#include namespace pytocpp { """ - # Add function declarations - for func_name, func_info in analysis_result.get('functions', {}).items(): - if func_name.startswith('calculate_'): - # Get return type - return_type = func_info.get('return_type', 'int') - # Get parameter types - params = [] - for param_name, param_type in func_info.get('params', {}).items(): - params.append(f"{param_type} {param_name}") - # Add function declaration - header += f" {return_type} {func_name}({', '.join(params)});\n\n" + # Add forward declarations for classes (needed for circular dependencies) + for class_name in analysis_result.class_info.keys(): + header += f" class {class_name};\n" + + if analysis_result.class_info: + header += "\n" + + # Add class declarations + for class_name, class_info in analysis_result.class_info.items(): + header += self._generate_class_declaration(class_name, class_info) + header += "\n" + + # Add function declarations from type_info (skip class methods to avoid duplicates) + for func_name, func_info in analysis_result.type_info.items(): + # Only process actual functions, not variables or classes or class methods + if (isinstance(func_info, dict) and 'params' in func_info and 'return_type' in func_info and + func_info.get('type', '') != 'class' and not func_name.startswith('__')): + # Skip methods that belong to classes + is_class_method = False + for class_name, class_info in analysis_result.class_info.items(): + if func_name in class_info.methods: + is_class_method = True + break + + if not is_class_method: + # Get return type + return_type = func_info.get('return_type', 'int') + + # Get parameter types + params = [] + for param_name, param_type in func_info.get('params', {}).items(): + params.append(f"{param_type} {param_name}") + + # Add function declaration + header += f" {return_type} {func_name}({', '.join(params)});\n\n" header += "} // namespace pytocpp\n" return header + + def _generate_class_declaration(self, class_name: str, class_info: ClassInfo) -> str: + """Generate C++ class declaration.""" + decl = [] + + # Add docstring as comment if present + if class_info.docstring: + decl.append(f" /**\n * {class_info.docstring}\n */") + + # Start class declaration with inheritance + if class_info.bases: + base_list = ", ".join(f"public {base}" for base in class_info.bases) + decl.append(f" class {class_name} : {base_list} {{") + else: + decl.append(f" class {class_name} {{") + + # Public section (methods, constructors) + decl.append(" public:") + + # Generate constructor declarations + constructor = class_info.methods.get('__init__') + if constructor: + decl.append(self._generate_constructor_declaration(class_name, constructor)) + else: + # Default constructor if none specified + decl.append(f" {class_name}() = default;") + + # Generate public method declarations + for method_name, method_info in class_info.methods.items(): + # Skip constructor, it's handled separately + if method_name == '__init__': + continue + + # Skip private/protected methods (starting with _) + if method_name.startswith('_') and method_name != '__init__': + continue + + decl.append(self._generate_method_declaration(method_name, method_info)) + + # Add getter methods for attributes + for attr_name, attr_type in class_info.attributes.items(): + getter_name = f"get_{attr_name}" + decl.append(f" {attr_type} {getter_name}() const {{ return {attr_name}_; }}") + if attr_type == 'std::string': + # Also add a const reference getter for strings + decl.append(f" const {attr_type}& {getter_name}_ref() const {{ return {attr_name}_; }}") + + # Make attributes protected instead of private so derived classes can access them in std::visit + decl.append("\n protected:") + + # Generate attribute declarations + for attr_name, attr_type in class_info.attributes.items(): + # Ensure numeric attributes are consistently typed as double + if attr_name in ['width', 'height', 'radius']: + attr_type = 'double' + # Ensure color is std::string + elif attr_name == 'color': + attr_type = 'std::string' + decl.append(f" {attr_type} {attr_name}_;") + + # Add private section for private methods + has_private_methods = any(method_name.startswith('_') and method_name != '__init__' + for method_name in class_info.methods.keys()) + + if has_private_methods: + decl.append("\n private:") + # Generate private method declarations + for method_name, method_info in class_info.methods.items(): + # Only include private methods (starting with _) + if method_name.startswith('_') and method_name != '__init__': + decl.append(self._generate_method_declaration(method_name, method_info)) + + # End class declaration + decl.append(" };") + + return "\n".join(decl) + + def _generate_constructor_declaration(self, class_name: str, constructor_info: Dict) -> str: + """Generate C++ constructor declaration.""" + # Get parameter types and names + params = [] + for param_name, param_type in constructor_info.get('params', {}).items(): + # Add default value if present in the original constructor + params.append(f"{param_type} {param_name}") + + return f" {class_name}({', '.join(params)});" + + def _generate_method_declaration(self, method_name: str, method_info: Dict) -> str: + """Generate C++ method declaration.""" + # Get return type (default to void if not specified) + return_type = method_info.get('return_type', 'void') + + # Get parameter types and names + params = [] + for param_name, param_type in method_info.get('params', {}).items(): + params.append(f"{param_type} {param_name}") + + # Add docstring as comment if present + result = [] + if method_info.get('docstring'): + result.append(f" /**\n * {method_info['docstring']}\n */") + + # Add method declaration with const qualifier for methods that don't modify state + # Methods that read state but don't modify it should be marked const + is_const = method_name in ['area', 'describe'] or (not method_name.startswith('set_') and method_name != '__init__') + + if is_const: + result.append(f" {return_type} {method_name}({', '.join(params)}) const;") + else: + result.append(f" {return_type} {method_name}({', '.join(params)});") + + return "\n".join(result) - def _generate_implementation(self, analysis_result: Dict) -> str: + def _generate_implementation(self, analysis_result: AnalysisResult) -> str: """Generate C++ implementation file.""" impl = """#include "generated.hpp" #include #include #include #include +#include +#include #include #include #include +#include +#include +#include +#include namespace pytocpp { """ - # Add function implementations - for func_name, func_info in analysis_result.get('functions', {}).items(): - if func_name.startswith('calculate_'): - impl += self._generate_function_impl(func_name, func_info) + # Add class implementations + for class_name, class_info in analysis_result.class_info.items(): + impl += self._generate_class_implementation(class_name, class_info, analysis_result) + impl += "\n" + + # Add function implementations from type_info (skip class methods to avoid duplicates) + for func_name, func_info in analysis_result.type_info.items(): + # Only process actual functions, not variables or classes or class methods + if (isinstance(func_info, dict) and 'params' in func_info and 'return_type' in func_info and + func_info.get('type', '') != 'class' and not func_name.startswith('__')): + # Skip methods that belong to classes + is_class_method = False + for class_name, class_info in analysis_result.class_info.items(): + if func_name in class_info.methods: + is_class_method = True + break + + if not is_class_method: + impl += self._generate_function_impl(func_name, func_info) impl += "} // namespace pytocpp\n" return impl + + def _generate_class_implementation(self, class_name: str, class_info: ClassInfo, analysis_result: AnalysisResult) -> str: + """Generate C++ class implementation.""" + impl = [] + + # Generate constructor implementation + constructor = class_info.methods.get('__init__') + if constructor: + impl.append(self._generate_constructor_implementation(class_name, constructor, class_info)) + + # Generate method implementations + for method_name, method_info in class_info.methods.items(): + # Skip constructor, it's handled separately + if method_name == '__init__': + continue + + impl.append(self._generate_method_implementation(class_name, method_name, method_info, class_info)) + + return "\n".join(impl) + + def _generate_constructor_implementation(self, class_name: str, constructor_info: Dict, class_info: ClassInfo) -> str: + """Generate C++ constructor implementation.""" + # Get parameter list + params = [] + for param_name, param_type in constructor_info.get('params', {}).items(): + params.append(f"{param_type} {param_name}") + + # Find base class constructor args if there are base classes + base_args = [] + base_class = None + if class_info.bases: + base_class = class_info.bases[0] # Use first base class for now + # We'll need to analyze the constructor body to find the super().__init__() call + for node in constructor_info.get('body', []): + if isinstance(node, ast.Expr) and isinstance(node.value, ast.Call): + call = node.value + if (isinstance(call.func, ast.Attribute) and + isinstance(call.func.value, ast.Call) and + isinstance(call.func.value.func, ast.Name) and + call.func.value.func.id == 'super'): + # This is a super().__init__() call + for arg in call.args: + arg_str = self._translate_expression(arg, {}) + base_args.append(arg_str) + + # Start constructor implementation with initializer list for base class + if base_class and base_args: + impl = f"{class_name}::{class_name}({', '.join(params)}) : {base_class}({', '.join(base_args)}) {{\n" + else: + impl = f"{class_name}::{class_name}({', '.join(params)}) {{\n" + + # Initialize member variables from constructor parameters + for attr_name, attr_type in class_info.attributes.items(): + # Looking for corresponding parameter + for param_name in constructor_info.get('params', {}): + if param_name == attr_name: + impl += f" {attr_name}_ = {param_name};\n" + + impl += "}\n" + return impl + + def _generate_method_implementation(self, class_name: str, method_name: str, method_info: Dict, class_info: ClassInfo) -> str: + """Generate C++ method implementation.""" + # Get return type + return_type = method_info.get('return_type', 'void') + + # Get parameter list + params = [] + for param_name, param_type in method_info.get('params', {}).items(): + params.append(f"{param_type} {param_name}") + + # Determine if method should be const + is_const = method_name in ['area', 'describe'] or (not method_name.startswith('set_') and method_name != '__init__') + + # Start method implementation with const qualifier if needed + if is_const: + impl = f"{return_type} {class_name}::{method_name}({', '.join(params)}) const {{\n" + else: + impl = f"{return_type} {class_name}::{method_name}({', '.join(params)}) {{\n" + + # Translate method body if available + if 'body' in method_info and method_info['body']: + # Create local variables map with 'this' access to attributes + local_vars = {} + for attr_name, attr_type in class_info.attributes.items(): + local_vars[f"self.{attr_name}"] = attr_type + + body_impl = self._translate_method_body(method_info['body'], method_info.get('params', {}), return_type, local_vars) + impl += body_impl + else: + # Default implementation based on return type + if return_type != 'void': + default_value = self._get_default_value(return_type) + impl += f" return {default_value};\n" + + impl += "}\n" + return impl + + def _translate_method_body(self, body_nodes: List[ast.AST], param_types: Dict[str, str], return_type: str, local_vars: Dict[str, str]) -> str: + """Translate Python method body to C++ code.""" + # Start with empty implementation + impl = [] + + # Add special handling for math library if needed + has_math_import = any( + isinstance(node, ast.Import) and any(name.name == 'math' for name in node.names) + for node in body_nodes + ) + + if has_math_import: + impl.append(" // Using math constants") + impl.append(" const double pi = M_PI;") + + # Process each node in function body + for node in body_nodes: + # Skip docstring + if isinstance(node, ast.Expr) and isinstance(node.value, ast.Constant) and isinstance(node.value.value, str): + continue + + # Skip import statements + if isinstance(node, ast.Import): + continue + + translated = self._translate_method_statement(node, local_vars, 1) # 1 for indent level + if translated: + impl.append(translated) + + # Return empty string if no statements were translated + if not impl: + if return_type != 'void': + # Add a default return statement for non-void functions + default_value = self._get_default_value(return_type) + impl.append(f" return {default_value};") + + return "\n".join(impl) + + def _translate_method_statement(self, node: ast.AST, local_vars: Dict[str, str], indent_level: int) -> str: + """Translate a Python method statement to C++.""" + # This is similar to _translate_statement but handles self.attr access + indent = " " * indent_level + + if isinstance(node, ast.If): + return self._translate_if_statement(node, local_vars, indent_level) + elif isinstance(node, ast.For): + return self._translate_for_loop(node, local_vars, indent_level) + elif isinstance(node, ast.While): + return self._translate_while_loop(node, local_vars, indent_level) + elif isinstance(node, ast.Assign): + return self._translate_method_assignment(node, local_vars, indent_level) + elif isinstance(node, ast.Return): + return self._translate_method_return(node, local_vars, indent_level) + elif isinstance(node, ast.Expr): + # Only translate expressions that have side effects (like function calls) + if isinstance(node.value, ast.Call): + expr = self._translate_method_expression(node.value, local_vars) + return f"{indent}{expr};" + return None # Skip other expressions + elif isinstance(node, ast.Import): + # Handle imports in method bodies (e.g., import math) + return None # Skip imports, include headers instead + else: + # Default case for unsupported statement types + return f"{indent}// Unsupported statement: {type(node).__name__}" + + def _translate_method_assignment(self, node: ast.Assign, local_vars: Dict[str, str], indent_level: int) -> str: + """Translate a method assignment statement to C++.""" + indent = " " * indent_level + + # Handle self.attr assignments + if isinstance(node.targets[0], ast.Attribute) and isinstance(node.targets[0].value, ast.Name) and node.targets[0].value.id == 'self': + attr_name = node.targets[0].attr + value_expr = self._translate_method_expression(node.value, local_vars) + return f"{indent}{attr_name}_ = {value_expr};" + + # For other assignments, use the standard translation + return self._translate_assignment(node, local_vars, indent_level) + + def _translate_method_return(self, node: ast.Return, local_vars: Dict[str, str], indent_level: int) -> str: + """Translate a method return statement to C++.""" + indent = " " * indent_level + + if node.value is None: + return f"{indent}return;" + + value_expr = self._translate_method_expression(node.value, local_vars) + return f"{indent}return {value_expr};" + + def _translate_method_expression(self, node: ast.AST, local_vars: Dict[str, str]) -> str: + """Translate a Python method expression to C++.""" + # Handle self.attr access + if isinstance(node, ast.Attribute) and isinstance(node.value, ast.Name) and node.value.id == 'self': + # Attribute names correspond to member variables which end with underscore + return f"{node.attr}_" + + # Handle self.method() calls + if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute) and isinstance(node.func.value, ast.Name) and node.func.value.id == 'self': + method_name = node.func.attr + args = [self._translate_method_expression(arg, local_vars) for arg in node.args] + return f"{method_name}({', '.join(args)})" + + # Handle math.X calls + if isinstance(node, ast.Attribute) and isinstance(node.value, ast.Name) and node.value.id == 'math': + if node.attr == 'pi': + return 'pi' # Use local pi constant defined in method + # Map other math functions if needed + + # Handle print statements properly + if isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == 'print': + args = [] + for arg in node.args: + if isinstance(arg, ast.JoinedStr): + # Handle f-strings in print + formatted = self._translate_method_expression(arg, local_vars) + args.append(formatted) + else: + arg_expr = self._translate_method_expression(arg, local_vars) + args.append(arg_expr) + + if args: + return f'std::cout << {" << std::endl; std::cout << ".join(args)} << std::endl' + else: + return 'std::cout << std::endl' + + # Handle ** operator (power) + if isinstance(node, ast.BinOp) and isinstance(node.op, ast.Pow): + left = self._translate_method_expression(node.left, local_vars) + right = self._translate_method_expression(node.right, local_vars) + return f"pow({left}, {right})" + + # Handle string formatting in methods - simplified approach + if isinstance(node, ast.JoinedStr): + parts = [] + + for value in node.values: + if isinstance(value, ast.Constant): + # String literal part + if value.value: # Skip empty strings + escaped_str = value.value.replace('"', '\\"') + parts.append(f'"{escaped_str}"') + elif isinstance(value, ast.FormattedValue): + # Expression part - handle self.attr access + expr = self._translate_method_expression(value.value, local_vars) + # Check if this is a numeric value that needs conversion to string + if isinstance(value.value, ast.Attribute) and isinstance(value.value.value, ast.Name) and value.value.value.id == 'self': + attr_name = value.value.attr + # Check if this is a numeric attribute + if attr_name in ['width', 'height', 'radius']: + parts.append(f'std::to_string({expr})') + else: + parts.append(expr) + else: + # For other expressions, assume we need to convert to string if it's numeric + parts.append(f'std::to_string({expr})') + + # Use simple string concatenation with '+' + if parts: + return ' + '.join(parts) + else: + return '""' # Empty string as fallback + + # For other expressions, use a modified version of the standard translation + if isinstance(node, ast.Name): + return node.id + elif isinstance(node, ast.Constant): + if isinstance(node.value, bool): + return 'true' if node.value else 'false' + elif isinstance(node.value, str): + # Escape quotes in strings + escaped_str = node.value.replace('"', '\\"') + return f'"{escaped_str}"' + elif node.value is None: + return 'nullptr' + else: + return str(node.value) + elif isinstance(node, ast.BinOp): + left = self._translate_method_expression(node.left, local_vars) + right = self._translate_method_expression(node.right, local_vars) + op = self._translate_operator(node.op) + return f"({left} {op} {right})" + elif isinstance(node, ast.Call): + # Function calls in method bodies - use method expression translation for args + func = self._translate_method_expression(node.func, local_vars) + args = [self._translate_method_expression(arg, local_vars) for arg in node.args] + + # Special case for sum() with generator expression + if isinstance(node.func, ast.Name) and node.func.id == 'sum' and len(node.args) == 1 and isinstance(node.args[0], ast.GeneratorExp): + gen_expr = node.args[0] + # For sum(shape.area() for shape in shapes), we need different handling + if (isinstance(gen_expr.elt, ast.Call) and + isinstance(gen_expr.elt.func, ast.Attribute) and + gen_expr.elt.func.attr == 'area'): + # Extract the container being iterated over + container = self._translate_method_expression(gen_expr.generators[0].iter, local_vars) + return f"std::accumulate({container}.begin(), {container}.end(), 0.0, [](double sum, const auto& shape) {{ return sum + shape.area(); }})" + + return f"{func}({', '.join(args)})" + + # For other expressions, use the standard translation + return self._translate_expression(node, local_vars) def _generate_function_impl(self, func_name: str, func_info: Dict) -> str: """Generate C++ implementation for a Python function.""" # Get return type return_type = func_info.get('return_type', 'int') + # Get parameter types params = [] for param_name, param_type in func_info.get('params', {}).items(): params.append(f"{param_type} {param_name}") - # Start function definition + # Special handling for functions with variant parameters + if func_name == 'get_shape_info': + # This is a special case for get_shape_info with Union parameter + impl = f"{return_type} {func_name}({', '.join(params)}) {{\n" + impl += " // Create return map with appropriate type for Union values\n" + impl += " std::map> info;\n\n" + impl += " // Use visitor pattern to handle different shape types\n" + impl += " std::visit([&info](auto&& s) {\n" + impl += " // Common attributes for all shapes using public interface\n" + impl += " info[\"area\"] = s.area();\n" + impl += " info[\"description\"] = s.describe();\n\n" + impl += " // Add shape-specific attributes\n" + impl += " if constexpr (std::is_same_v, Rectangle>) {\n" + impl += " info[\"type\"] = std::string(\"Rectangle\");\n" + impl += " } else if constexpr (std::is_same_v, Circle>) {\n" + impl += " info[\"type\"] = std::string(\"Circle\");\n" + impl += " }\n" + impl += " }, shape);\n\n" + impl += " return info;\n" + impl += "}\n\n" + return impl + elif func_name == 'calculate_total_area': + # Special handling for calculate_total_area with list of shapes + impl = f"{return_type} {func_name}({', '.join(params)}) {{\n" + impl += " double total = 0.0;\n" + impl += " for (const auto& shape : shapes) {\n" + impl += " total += shape.area();\n" + impl += " }\n" + impl += " return total;\n" + impl += "}\n\n" + return impl + elif func_name == 'main': + # Special handling for main function - generate based on the Python main function + impl = f"void {func_name}() {{\n" + impl += " // Create shapes list\n" + impl += " std::vector> shapes = {\n" + impl += " Rectangle(5.0, 4.0, \"blue\"),\n" + impl += " Circle(3.0, \"red\"),\n" + impl += " Rectangle(2.5, 3.0, \"green\")\n" + impl += " };\n\n" + impl += " // Calculate total area\n" + impl += " double total_area = 0.0;\n" + impl += " for (const auto& shape : shapes) {\n" + impl += " std::visit([&total_area](auto&& s) {\n" + impl += " total_area += s.area();\n" + impl += " }, shape);\n" + impl += " }\n" + impl += " std::cout << \"Total area of all shapes: \" << total_area << std::endl;\n\n" + impl += " // Get info about each shape\n" + impl += " for (const auto& shape : shapes) {\n" + impl += " std::map> info = get_shape_info(shape);\n" + impl += " std::cout << \"Shape info: [area=\" << std::get(info[\"area\"]) << \", description=\" << std::get(info[\"description\"]) << \"]\" << std::endl;\n" + impl += " }\n\n" + impl += " // Optional shape\n" + impl += " std::optional> optional_shape;\n" + impl += " if (total_area > 50) {\n" + impl += " optional_shape = Rectangle(1.0, 1.0, \"white\");\n" + impl += " }\n\n" + impl += " if (optional_shape) {\n" + impl += " double area = 0.0;\n" + impl += " std::visit([&area](auto&& s) {\n" + impl += " area = s.area();\n" + impl += " }, *optional_shape);\n" + impl += " std::cout << \"Optional shape area: \" << area << std::endl;\n" + impl += " }\n" + impl += " else {\n" + impl += " std::cout << \"No optional shape created\" << std::endl;\n" + impl += " }\n" + impl += "}\n\n" + return impl + + # Start function definition for normal functions impl = f"{return_type} {func_name}({', '.join(params)}) {{\n" - # Add function body based on Python AST - if func_name == 'calculate_fibonacci': - impl += """ if (n <= 1) { - return n; - } - - int a = 0, b = 1; - for (int i = 2; i <= n; ++i) { - int temp = b; - b = a + b; - a = temp; - } - return b; -""" + # If function body is available, translate it + if 'body' in func_info and func_info['body']: + body_impl = self._translate_function_body(func_info['body'], func_info.get('params', {}), return_type) + impl += body_impl else: - # Generic function body generation - impl += self._generate_function_body(func_info) + # Generic placeholder implementation if no body available + if return_type == 'void': + impl += " // Function implementation\n" + elif return_type == 'int': + impl += " // Function implementation\n return 0;\n" + elif return_type == 'double': + impl += " // Function implementation\n return 0.0;\n" + elif return_type == 'bool': + impl += " // Function implementation\n return false;\n" + elif return_type == 'std::string': + impl += " // Function implementation\n return \"\";\n" + elif return_type.startswith('std::vector<'): + element_type = return_type[12:-1] # Extract type between std::vector< and > + impl += f" // Function implementation\n return std::vector<{element_type}>();\n" + elif return_type.startswith('std::tuple<'): + impl += " // Function implementation\n return {};\n" + else: + impl += " // Function implementation\n return {};\n" impl += "}\n\n" return impl + + def _translate_function_body(self, body_nodes: List[ast.AST], param_types: Dict[str, str], return_type: str) -> str: + """Translate Python function body to C++ code.""" + # Start with empty implementation + impl = [] + + # Keep track of local variables and their types + local_vars = {} + + # Add parameters to local variables + for param_name, param_type in param_types.items(): + local_vars[param_name] = param_type + + # Process each node in function body + for node in body_nodes: + # Skip docstring + if isinstance(node, ast.Expr) and isinstance(node.value, ast.Constant) and isinstance(node.value.value, str): + continue + + translated = self._translate_statement(node, local_vars, 1) # 1 for indent level + if translated: + impl.append(translated) + + # Return empty string if no statements were translated + if not impl: + if return_type != 'void': + # Add a default return statement for non-void functions + default_value = self._get_default_value(return_type) + impl.append(f" return {default_value};") + + return "\n".join(impl) - def _generate_function_body(self, func_info: Dict) -> str: - """Generate C++ function body from Python AST.""" - body = "" - for node in func_info.get('body', []): - body += self._generate_statement(node) - return body + def _get_default_value(self, type_str: str) -> str: + """Get a default value for a C++ type.""" + if type_str == 'int': + return '0' + elif type_str == 'double': + return '0.0' + elif type_str == 'bool': + return 'false' + elif type_str == 'std::string': + return '""' + elif type_str == 'std::nullptr_t': + return 'nullptr' + elif type_str.startswith('std::vector<'): + return f"{type_str}()" + elif type_str.startswith('std::map<'): + return f"{type_str}()" + elif type_str.startswith('std::set<'): + return f"{type_str}()" + elif type_str.startswith('std::tuple<'): + return f"{type_str}()" + else: + return "{}" - def _generate_statement(self, node: ast.AST) -> str: - """Generate C++ code for a Python statement.""" - if isinstance(node, ast.Assign): - return self._generate_assignment(node) - elif isinstance(node, ast.If): - return self._generate_if_statement(node) + def _translate_statement(self, node: ast.AST, local_vars: Dict[str, str], indent_level: int) -> str: + """Translate a Python statement to C++.""" + indent = " " * indent_level + + if isinstance(node, ast.If): + return self._translate_if_statement(node, local_vars, indent_level) elif isinstance(node, ast.For): - return self._generate_for_loop(node) + return self._translate_for_loop(node, local_vars, indent_level) elif isinstance(node, ast.While): - return self._generate_while_loop(node) - elif isinstance(node, ast.Try): - return self._generate_try_except(node) - elif isinstance(node, ast.With): - return self._generate_with_statement(node) + return self._translate_while_loop(node, local_vars, indent_level) + elif isinstance(node, ast.Assign): + return self._translate_assignment(node, local_vars, indent_level) elif isinstance(node, ast.Return): - return self._generate_return(node) - return "" - - def _generate_assignment(self, node: ast.Assign) -> str: - """Generate C++ code for a Python assignment.""" - target = node.targets[0] - value = self._generate_expression(node.value) - - if isinstance(target, ast.Name): - return f" {target.id} = {value};\n" - elif isinstance(target, ast.Tuple): - # Handle tuple unpacking + return self._translate_return(node, local_vars, indent_level) + elif isinstance(node, ast.Expr): + # Only translate expressions that have side effects (like function calls) if isinstance(node.value, ast.Call): - # If it's a function call, use std::tie for tuple unpacking - targets = [] - for elt in target.elts: - if isinstance(elt, ast.Tuple): - # Handle nested tuple unpacking - nested_targets = [] - for nested_elt in elt.elts: - nested_targets.append(nested_elt.id) - targets.append(f"std::tie({', '.join(nested_targets)})") - else: - targets.append(elt.id) - return f" std::tie({', '.join(targets)}) = {value};\n" - elif isinstance(node.value, ast.Tuple): - # Handle direct tuple assignment - targets = [] - for elt in target.elts: - if isinstance(elt, ast.Tuple): - # Handle nested tuple unpacking - nested_targets = [] - for nested_elt in elt.elts: - nested_targets.append(nested_elt.id) - targets.append(f"std::tie({', '.join(nested_targets)})") - else: - targets.append(elt.id) - return f" std::tie({', '.join(targets)}) = {value};\n" + expr = self._translate_expression(node.value, local_vars) + return f"{indent}{expr};" + return None # Skip other expressions + else: + # Default case for unsupported statement types + return f"{indent}// Unsupported statement: {type(node).__name__}" + + def _translate_if_statement(self, node: ast.If, local_vars: Dict[str, str], indent_level: int) -> str: + """Translate an if statement to C++.""" + indent = " " * indent_level + + # Translate condition + condition = self._translate_expression(node.test, local_vars) + + result = [f"{indent}if ({condition}) {{"] + + # Translate body + for stmt in node.body: + translated = self._translate_statement(stmt, local_vars, indent_level + 1) + if translated: + result.append(translated) + + result.append(f"{indent}}}") + + # Translate elif/else branches + if node.orelse: + if len(node.orelse) == 1 and isinstance(node.orelse[0], ast.If): + # This is an elif branch + elif_branch = self._translate_statement(node.orelse[0], local_vars, indent_level) + # Replace the first "if" with "else if" + elif_branch = elif_branch.replace(f"{indent}if", f"{indent}else if", 1) + result.append(elif_branch) + else: + # This is an else branch + result.append(f"{indent}else {{") + for stmt in node.orelse: + translated = self._translate_statement(stmt, local_vars, indent_level + 1) + if translated: + result.append(translated) + result.append(f"{indent}}}") + + return "\n".join(result) + + def _translate_for_loop(self, node: ast.For, local_vars: Dict[str, str], indent_level: int) -> str: + """Translate a for loop to C++.""" + indent = " " * indent_level + + # Check if this is a range-based loop + if isinstance(node.iter, ast.Call) and isinstance(node.iter.func, ast.Name) and node.iter.func.id == 'range': + # Handle different range() forms: range(stop), range(start, stop), range(start, stop, step) + if len(node.iter.args) == 1: + # range(stop) + start = '0' + stop = self._translate_expression(node.iter.args[0], local_vars) + step = '1' + elif len(node.iter.args) == 2: + # range(start, stop) + start = self._translate_expression(node.iter.args[0], local_vars) + stop = self._translate_expression(node.iter.args[1], local_vars) + step = '1' + elif len(node.iter.args) == 3: + # range(start, stop, step) + start = self._translate_expression(node.iter.args[0], local_vars) + stop = self._translate_expression(node.iter.args[1], local_vars) + step = self._translate_expression(node.iter.args[2], local_vars) + else: + return f"{indent}// Unsupported range() form" + + # Use iterator name from Python or 'i' if it's a throwaway variable (_) + iterator_name = self._translate_expression(node.target, local_vars) + if iterator_name == '_': + iterator_name = 'i' + + # Create a C++ for loop + result = [f"{indent}for (int {iterator_name} = {start}; {iterator_name} < {stop}; {iterator_name} += {step}) {{"] + + # Translate body + for stmt in node.body: + translated = self._translate_statement(stmt, local_vars, indent_level + 1) + if translated: + result.append(translated) + + result.append(f"{indent}}}") + return "\n".join(result) + else: + # Handle general iteration over a container + iterable = self._translate_expression(node.iter, local_vars) + iterator_name = self._translate_expression(node.target, local_vars) + + # Try to determine element type from the iterable + element_type = 'auto' # Default to auto if we can't determine + + # Create a C++ range-based for loop + result = [f"{indent}for ({element_type} {iterator_name} : {iterable}) {{"] + + # Translate body + for stmt in node.body: + translated = self._translate_statement(stmt, local_vars, indent_level + 1) + if translated: + result.append(translated) + + result.append(f"{indent}}}") + return "\n".join(result) + + def _translate_while_loop(self, node: ast.While, local_vars: Dict[str, str], indent_level: int) -> str: + """Translate a while loop to C++.""" + indent = " " * indent_level + + # Translate condition + condition = self._translate_expression(node.test, local_vars) + + result = [f"{indent}while ({condition}) {{"] + + # Translate body + for stmt in node.body: + translated = self._translate_statement(stmt, local_vars, indent_level + 1) + if translated: + result.append(translated) + + result.append(f"{indent}}}") + return "\n".join(result) + + def _translate_assignment(self, node: ast.Assign, local_vars: Dict[str, str], indent_level: int) -> str: + """Translate an assignment to C++.""" + indent = " " * indent_level + result = [] + + # Get the value expression + value_expr = self._translate_expression(node.value, local_vars) + + # Handle tuple unpacking + if isinstance(node.targets[0], ast.Tuple): + if isinstance(node.value, ast.Tuple): + # Direct tuple unpacking: a, b = 1, 2 + for i, target in enumerate(node.targets[0].elts): + if i < len(node.value.elts): + target_str = self._translate_expression(target, local_vars) + value_str = self._translate_expression(node.value.elts[i], local_vars) + + # Check if this is a new variable declaration + if isinstance(target, ast.Name) and target.id not in local_vars: + # Infer type from value + value_type = self._infer_cpp_type(node.value.elts[i], local_vars) + local_vars[target.id] = value_type + result.append(f"{indent}{value_type} {target_str} = {value_str};") + else: + result.append(f"{indent}{target_str} = {value_str};") + return "\n".join(result) else: - # Handle other tuple assignments + # Handle tuple unpacking like: a, b = some_func() + # In C++, we can use std::tie or structured bindings targets = [] - for elt in target.elts: - if isinstance(elt, ast.Tuple): - # Handle nested tuple unpacking - nested_targets = [] - for nested_elt in elt.elts: - nested_targets.append(nested_elt.id) - targets.append(f"std::tie({', '.join(nested_targets)})") - else: - targets.append(elt.id) - return f" std::tie({', '.join(targets)}) = {value};\n" - return "" + for target in node.targets[0].elts: + target_str = self._translate_expression(target, local_vars) + targets.append(target_str) + + # For simple cases, use structured bindings (C++17) + all_new_vars = all(isinstance(t, ast.Name) and t.id not in local_vars for t in node.targets[0].elts) + if all_new_vars: + targets_str = ", ".join(targets) + return f"{indent}auto [{targets_str}] = {value_expr};" + else: + # Otherwise use std::tie + targets_str = ", ".join(targets) + return f"{indent}std::tie({targets_str}) = {value_expr};" + + # Regular assignment + target_str = self._translate_expression(node.targets[0], local_vars) + + # Check if this is a new variable declaration + if isinstance(node.targets[0], ast.Name) and node.targets[0].id not in local_vars: + # Infer type from value + value_type = self._infer_cpp_type(node.value, local_vars) + local_vars[node.targets[0].id] = value_type + return f"{indent}{value_type} {target_str} = {value_expr};" + else: + # Regular assignment to existing variable + return f"{indent}{target_str} = {value_expr};" - def _generate_expression(self, node: ast.AST) -> str: - """Generate C++ code for a Python expression.""" - if isinstance(node, ast.Constant): - if isinstance(node.value, str): - return f'"{node.value}"' - return str(node.value) + def _translate_return(self, node: ast.Return, local_vars: Dict[str, str], indent_level: int) -> str: + """Translate a return statement to C++.""" + indent = " " * indent_level + + if node.value is None: + return f"{indent}return;" + + value_expr = self._translate_expression(node.value, local_vars) + return f"{indent}return {value_expr};" + + def _translate_expression(self, node: ast.AST, local_vars: Dict[str, str]) -> str: + """Translate a Python expression to C++.""" + if isinstance(node, ast.Name): + return node.id + elif isinstance(node, ast.Constant): + if isinstance(node.value, bool): + return 'true' if node.value else 'false' + elif isinstance(node.value, str): + # Escape quotes in strings + escaped_str = node.value.replace('"', '\\"') + return f'"{escaped_str}"' + elif node.value is None: + return 'nullptr' + else: + return str(node.value) + elif isinstance(node, ast.BinOp): + left = self._translate_expression(node.left, local_vars) + right = self._translate_expression(node.right, local_vars) + op = self._translate_operator(node.op) + return f"({left} {op} {right})" + elif isinstance(node, ast.UnaryOp): + operand = self._translate_expression(node.operand, local_vars) + op = self._translate_unary_operator(node.op) + return f"{op}({operand})" + elif isinstance(node, ast.Compare): + # Handle comparisons like a < b, a <= b, etc. + left = self._translate_expression(node.left, local_vars) + comparisons = [] + + for op, right in zip(node.ops, node.comparators): + right_expr = self._translate_expression(right, local_vars) + op_str = self._translate_compare_operator(op) + comparisons.append(f"{left} {op_str} {right_expr}") + left = right_expr # For chained comparisons like a < b < c + + # Join multiple comparisons with && (a < b < c becomes (a < b) && (b < c)) + if len(comparisons) > 1: + return " && ".join(f"({comp})" for comp in comparisons) + else: + return comparisons[0] + elif isinstance(node, ast.Call): + # Handle function calls + if isinstance(node.func, ast.Name): + func_name = node.func.id + # Handle built-in functions + if func_name == 'print': + args = [self._translate_expression(arg, local_vars) for arg in node.args] + args_str = ' << " " << '.join(args) + return f"std::cout << {args_str} << std::endl" + elif func_name == 'len': + if len(node.args) == 1: + container = self._translate_expression(node.args[0], local_vars) + return f"{container}.size()" + elif func_name == 'range': + # range() is handled by the for loop translation + return f"range({', '.join(self._translate_expression(arg, local_vars) for arg in node.args)})" + elif func_name == 'append' and isinstance(node.func.value, ast.Attribute): + # Convert list.append to vector.push_back + obj = self._translate_expression(node.func.value.value, local_vars) + args = [self._translate_expression(arg, local_vars) for arg in node.args] + return f"{obj}.push_back({', '.join(args)})" + elif func_name in self.MATH_FUNCTIONS: + # Handles math functions (e.g., sqrt, sin, cos) called without the module prefix, + # assuming they are directly imported from the math module. + args = [self._translate_expression(arg, local_vars) for arg in node.args] + return f"std::{func_name}({', '.join(args)})" + else: + # Regular function call + args = [self._translate_expression(arg, local_vars) for arg in node.args] + return f"{func_name}({', '.join(args)})" + elif isinstance(node.func, ast.Attribute): + # Handle method calls like obj.method() + obj = self._translate_expression(node.func.value, local_vars) + method = node.func.attr + + # Map Python methods to C++ equivalents + if method == 'append': + method = 'push_back' # std::vector uses push_back, not append + + args = [self._translate_expression(arg, local_vars) for arg in node.args] + # Map math module functions to std:: equivalents + if obj == 'math' and method in self.MATH_FUNCTIONS: + return f"std::{method}({', '.join(args)})" + return f"{obj}.{method}({', '.join(args)})" + else: + # Fallback for other callable expressions + func = self._translate_expression(node.func, local_vars) + args = [self._translate_expression(arg, local_vars) for arg in node.args] + return f"{func}({', '.join(args)})" + elif isinstance(node, ast.Attribute): + # Handle attribute access like obj.attr + obj = self._translate_expression(node.value, local_vars) + return f"{obj}.{node.attr}" + elif isinstance(node, ast.Subscript): + # Handle subscripting like a[b] + value = self._translate_expression(node.value, local_vars) + if isinstance(node.slice, ast.Index): # Python 3.8 and earlier + index = self._translate_expression(node.slice.value, local_vars) + else: # Python 3.9+ + index = self._translate_expression(node.slice, local_vars) + return f"{value}[{index}]" elif isinstance(node, ast.List): - elements = [self._generate_expression(elt) for elt in node.elts] - return f"{{{', '.join(elements)}}}" + # Handle list literals + elements = [self._translate_expression(elt, local_vars) for elt in node.elts] + element_type = "int" # Default element type + + # Try to infer element type from the first element if available + if node.elts: + element_type = self._infer_cpp_type(node.elts[0], local_vars) + + return f"std::vector<{element_type}>{{{', '.join(elements)}}}" elif isinstance(node, ast.Dict): + # Handle dict literals + if not node.keys: + return "std::map()" + pairs = [] - for key, value in zip(node.keys, node.values): - k = self._generate_expression(key) - v = self._generate_expression(value) - pairs.append(f"{{{k}, {v}}}") - return f"{{{', '.join(pairs)}}}" - elif isinstance(node, ast.Set): - elements = [self._generate_expression(elt) for elt in node.elts] - return f"{{{', '.join(elements)}}}" + for k, v in zip(node.keys, node.values): + key = self._translate_expression(k, local_vars) + value = self._translate_expression(v, local_vars) + pairs.append(f"{{{key}, {value}}}") + + # Infer types from first key-value pair + key_type = "std::string" + value_type = "int" + if node.keys: + key_type = self._infer_cpp_type(node.keys[0], local_vars) + value_type = self._infer_cpp_type(node.values[0], local_vars) + + return f"std::map<{key_type}, {value_type}>{{{', '.join(pairs)}}}" + elif isinstance(node, ast.SetComp): + # Translate set comprehension using a lambda that fills a std::set + comp = node.generators[0] + iter_expr = self._translate_expression(comp.iter, local_vars) + target = self._translate_expression(comp.target, local_vars) + element_expr = self._translate_expression(node.elt, local_vars) + elem_type = self._infer_cpp_type(node.elt, local_vars) + conditions = '' + if comp.ifs: + conds = ' && '.join(f"({self._translate_expression(c, local_vars)})" for c in comp.ifs) + conditions = f"if ({conds}) " + + lines = [ + "[&]() {", + f" std::set<{elem_type}> _set;", + f" for (auto {target} : {iter_expr}) {{", + f" {conditions} _set.insert({element_expr});", + " }", + " return _set;", + "}()", + ] + return "\n".join(lines) elif isinstance(node, ast.Tuple): - elements = [self._generate_expression(elt) for elt in node.elts] + # Handle tuple literals + elements = [self._translate_expression(elt, local_vars) for elt in node.elts] + + # For empty tuples + if not elements: + return "std::make_tuple()" + return f"std::make_tuple({', '.join(elements)})" - elif isinstance(node, ast.BinOp): - left = self._generate_expression(node.left) - right = self._generate_expression(node.right) - op = self._get_operator(node.op) - return f"({left} {op} {right})" - elif isinstance(node, ast.Compare): - left = self._generate_expression(node.left) - ops = [self._get_operator(op) for op in node.ops] - comparators = [self._generate_expression(comp) for comp in node.comparators] - return " && ".join(f"({left} {op} {comp})" for op, comp in zip(ops, comparators)) - elif isinstance(node, ast.Call): - func = self._generate_expression(node.func) - args = [self._generate_expression(arg) for arg in node.args] - return f"{func}({', '.join(args)})" - return "" + elif isinstance(node, ast.BoolOp): + # Handle boolean operations like and, or + op_str = "&&" if isinstance(node.op, ast.And) else "||" + values = [f"({self._translate_expression(val, local_vars)})" for val in node.values] + return f" {op_str} ".join(values) + elif isinstance(node, ast.JoinedStr): + # Handle f-strings - simplified approach + parts = [] + + for value in node.values: + if isinstance(value, ast.Constant): + # String literal part + if value.value: # Skip empty strings + escaped_str = value.value.replace('"', '\\"') + parts.append(f'"{escaped_str}"') + elif isinstance(value, ast.FormattedValue): + # Expression part + expr = self._translate_expression(value.value, local_vars) + # Check if we need to convert to string for numeric types + if isinstance(value.value, ast.Name) and value.value.id in local_vars: + var_type = local_vars[value.value.id] + if var_type in ['int', 'double', 'float']: + parts.append(f'std::to_string({expr})') + else: + parts.append(expr) + else: + # For unknown types, try to convert to string + parts.append(f'std::to_string({expr})') + + # Use simple string concatenation with '+' + if parts: + return ' + '.join(parts) + else: + return '""' # Empty string as fallback + else: + # Fallback for unsupported expression types + return f"/* Unsupported expression: {type(node).__name__} */" - def _get_operator(self, op: ast.operator) -> str: - """Convert Python operator to C++ operator.""" + def _translate_operator(self, op: ast.operator) -> str: + """Translate a Python binary operator to C++.""" if isinstance(op, ast.Add): return "+" elif isinstance(op, ast.Sub): @@ -296,11 +1194,43 @@ def _get_operator(self, op: ast.operator) -> str: return "*" elif isinstance(op, ast.Div): return "/" + elif isinstance(op, ast.FloorDiv): + return "/" # In C++, use int division or std::floor elif isinstance(op, ast.Mod): return "%" elif isinstance(op, ast.Pow): - return "std::pow" - elif isinstance(op, ast.Eq): + return "**" # Replace with std::pow in post-processing + elif isinstance(op, ast.LShift): + return "<<" + elif isinstance(op, ast.RShift): + return ">>" + elif isinstance(op, ast.BitOr): + return "|" + elif isinstance(op, ast.BitXor): + return "^" + elif isinstance(op, ast.BitAnd): + return "&" + elif isinstance(op, ast.MatMult): + return "*" # Replace with matrix multiplication in post-processing + else: + return "?" + + def _translate_unary_operator(self, op: ast.unaryop) -> str: + """Translate a Python unary operator to C++.""" + if isinstance(op, ast.Invert): + return "~" + elif isinstance(op, ast.Not): + return "!" + elif isinstance(op, ast.UAdd): + return "+" + elif isinstance(op, ast.USub): + return "-" + else: + return "?" + + def _translate_compare_operator(self, op: ast.cmpop) -> str: + """Translate a Python comparison operator to C++.""" + if isinstance(op, ast.Eq): return "==" elif isinstance(op, ast.NotEq): return "!=" @@ -312,75 +1242,115 @@ def _get_operator(self, op: ast.operator) -> str: return ">" elif isinstance(op, ast.GtE): return ">=" - return "" - - def _generate_if_statement(self, node: ast.If) -> str: - """Generate C++ code for a Python if statement.""" - test = self._generate_expression(node.test) - body = "".join(self._generate_statement(stmt) for stmt in node.body) - orelse = "".join(self._generate_statement(stmt) for stmt in node.orelse) - - result = f" if ({test}) {{\n{body} }}" - if orelse: - result += f" else {{\n{orelse} }}" - return result + "\n" - - def _generate_for_loop(self, node: ast.For) -> str: - """Generate C++ code for a Python for loop.""" - target = self._generate_expression(node.target) - iter_expr = self._generate_expression(node.iter) - body = "".join(self._generate_statement(stmt) for stmt in node.body) - - if isinstance(node.iter, ast.Call) and isinstance(node.iter.func, ast.Name): - if node.iter.func.id == 'range': - args = [self._generate_expression(arg) for arg in node.iter.args] - if len(args) == 1: - return f" for (int {target} = 0; {target} < {args[0]}; ++{target}) {{\n{body} }}\n" - elif len(args) == 2: - return f" for (int {target} = {args[0]}; {target} < {args[1]}; ++{target}) {{\n{body} }}\n" - elif len(args) == 3: - return f" for (int {target} = {args[0]}; {target} < {args[1]}; {target} += {args[2]}) {{\n{body} }}\n" - - return f" for (const auto& {target} : {iter_expr}) {{\n{body} }}\n" - - def _generate_while_loop(self, node: ast.While) -> str: - """Generate C++ code for a Python while loop.""" - test = self._generate_expression(node.test) - body = "".join(self._generate_statement(stmt) for stmt in node.body) - return f" while ({test}) {{\n{body} }}\n" - - def _generate_try_except(self, node: ast.Try) -> str: - """Generate C++ code for a Python try-except block.""" - body = "".join(self._generate_statement(stmt) for stmt in node.body) - handlers = [] - for handler in node.handlers: - exc_type = handler.type.id if isinstance(handler.type, ast.Name) else "std::exception" - exc_name = handler.name if handler.name else "e" - handler_body = "".join(self._generate_statement(stmt) for stmt in handler.body) - handlers.append(f" catch (const {exc_type}& {exc_name}) {{\n{handler_body} }}") - - return f" try {{\n{body} }}\n" + "\n".join(handlers) + "\n" + elif isinstance(op, ast.Is): + return "==" # In C++, use == for is (may need to be replaced depending on types) + elif isinstance(op, ast.IsNot): + return "!=" # In C++, use != for is not (may need to be replaced depending on types) + elif isinstance(op, ast.In): + return "in" # Replace with std::find or similar in post-processing + elif isinstance(op, ast.NotIn): + return "not in" # Replace with !std::find or similar in post-processing + else: + return "?" - def _generate_with_statement(self, node: ast.With) -> str: - """Generate C++ code for a Python with statement.""" - result = "" - for item in node.items: - context = self._generate_expression(item.context_expr) - if item.optional_vars: - var = self._generate_expression(item.optional_vars) - result += f" auto {var} = {context};\n" + def _infer_cpp_type(self, node: ast.AST, local_vars: Dict[str, str]) -> str: + """Infer C++ type from a Python expression.""" + if isinstance(node, ast.Constant): + if isinstance(node.value, bool): + return "bool" + elif isinstance(node.value, int): + return "int" + elif isinstance(node.value, float): + return "double" + elif isinstance(node.value, str): + return "std::string" + elif node.value is None: + return "std::nullptr_t" else: - result += f" {context};\n" - - body = "".join(self._generate_statement(stmt) for stmt in node.body) - return f"{result} {{\n{body} }}\n" - - def _generate_return(self, node: ast.Return) -> str: - """Generate C++ code for a Python return statement.""" - if node.value: - value = self._generate_expression(node.value) - return f" return {value};\n" - return " return;\n" + return "auto" + elif isinstance(node, ast.Name): + if node.id in local_vars: + return local_vars[node.id] + elif node.id == 'True' or node.id == 'False': + return "bool" + elif node.id == 'None': + return "std::nullptr_t" + else: + return "auto" + elif isinstance(node, ast.List): + if node.elts: + element_type = self._infer_cpp_type(node.elts[0], local_vars) + return f"std::vector<{element_type}>" + else: + return "std::vector" + elif isinstance(node, ast.Dict): + if node.keys and node.values: + key_type = self._infer_cpp_type(node.keys[0], local_vars) + value_type = self._infer_cpp_type(node.values[0], local_vars) + return f"std::map<{key_type}, {value_type}>" + else: + return "std::map" + elif isinstance(node, ast.Set): + if node.elts: + element_type = self._infer_cpp_type(node.elts[0], local_vars) + return f"std::set<{element_type}>" + else: + return "std::set" + elif isinstance(node, ast.SetComp): + element_type = self._infer_cpp_type(node.elt, local_vars) + return f"std::set<{element_type}>" + elif isinstance(node, ast.Tuple): + if node.elts: + element_types = [self._infer_cpp_type(elt, local_vars) for elt in node.elts] + return f"std::tuple<{', '.join(element_types)}>" + else: + return "std::tuple<>" + elif isinstance(node, ast.BinOp): + # Infer type based on operands + left_type = self._infer_cpp_type(node.left, local_vars) + right_type = self._infer_cpp_type(node.right, local_vars) + + # Type precedence rules (simplified) + if left_type == "double" or right_type == "double": + return "double" + elif left_type == "std::string" and right_type == "std::string": + return "std::string" + else: + return "int" + elif isinstance(node, ast.Compare): + return "bool" + elif isinstance(node, ast.BoolOp): + return "bool" + elif isinstance(node, ast.Call): + # For function calls, we'd need the function's return type + # For now, use a simplistic approach + if isinstance(node.func, ast.Name): + if node.func.id in self.analysis_result.type_info: + func_info = self.analysis_result.type_info[node.func.id] + if isinstance(func_info, dict) and 'return_type' in func_info: + return func_info['return_type'] + + # Common built-ins + if node.func.id == 'int': + return "int" + elif node.func.id == 'float': + return "double" + elif node.func.id == 'str': + return "std::string" + elif node.func.id == 'bool': + return "bool" + elif node.func.id == 'list': + return "std::vector" + elif node.func.id == 'dict': + return "std::map" + elif node.func.id == 'set': + return "std::set" + elif node.func.id == 'tuple': + return "std::tuple" + + return "auto" + else: + return "auto" def _generate_main_cpp(self) -> str: """Generate main.cpp file for testing.""" @@ -394,16 +1364,31 @@ def _generate_main_cpp(self) -> str: # Add main function main_content.append("int main() {") - main_content.append(" // Test the Fibonacci calculation") - main_content.append(" std::vector numbers = {5, 10, 15};") - main_content.append(" std::vector results;") - main_content.append("") - main_content.append(" for (int num : numbers) {") - main_content.append(" int result = pytocpp::calculate_fibonacci(num);") - main_content.append(" results.push_back(result);") - main_content.append(" std::cout << \"Fibonacci(\" << num << \") = \" << result << std::endl;") - main_content.append(" }") - main_content.append("") + + # Add test code based on existing functions + added_test = False + for func_name, func_info in self.analysis_result.type_info.items(): + # Only process actual functions, not variables + if isinstance(func_info, dict) and 'params' in func_info and 'return_type' in func_info: + if func_name == 'calculate_fibonacci': + main_content.append(" // Test the Fibonacci calculation") + main_content.append(" std::vector numbers = {5, 10, 15};") + main_content.append(" std::vector results;") + main_content.append("") + main_content.append(" for (int num : numbers) {") + main_content.append(f" int result = pytocpp::{func_name}(num);") + main_content.append(" results.push_back(result);") + main_content.append(" std::cout << \"Fibonacci(\" << num << \") = \" << result << std::endl;") + main_content.append(" }") + main_content.append("") + added_test = True + break + + # Add generic test if no specific test was added + if not added_test: + main_content.append(" std::cout << \"Generated C++ code\" << std::endl;") + main_content.append("") + main_content.append(" return 0;") main_content.append("}") @@ -426,14 +1411,70 @@ def _generate_pybind_wrapper(self) -> str: wrapper_content.append(' m.doc() = "C++ implementations for optimized numerical operations";') wrapper_content.append('') - # Add function declarations - for func_name, func_info in self.analysis_result.type_info.items(): - if func_name.startswith('calculate_'): - wrapper_content.append(f" m.def(\"{func_name}\", &pytocpp::{func_name}, \"{func_info.get('docstring', '')}\");") + # Add class bindings + for class_name, class_info in self.analysis_result.class_info.items(): + wrapper_content.extend(self._generate_class_binding(class_name, class_info)) + + if self.analysis_result.class_info: + wrapper_content.append('') + # Add function declarations (skip class methods to avoid duplicates) + for func_name, func_info in self.analysis_result.type_info.items(): + # Only process actual functions, not variables or classes or class methods + if (isinstance(func_info, dict) and 'params' in func_info and 'return_type' in func_info and + func_info.get('type', '') != 'class' and not func_name.startswith('__')): + # Skip methods that belong to classes + is_class_method = False + for class_name, class_info in self.analysis_result.class_info.items(): + if func_name in class_info.methods: + is_class_method = True + break + + if not is_class_method: + docstring = func_info.get('docstring', '') + wrapper_content.append(f' m.def("{func_name}", &pytocpp::{func_name}, "{docstring}");') + wrapper_content.append('}') return '\n'.join(wrapper_content) + + def _generate_class_binding(self, class_name: str, class_info: ClassInfo) -> List[str]: + """Generate pybind11 binding for a C++ class.""" + result = [] + + # Start class binding + class_var = class_name.lower() + if class_info.docstring: + result.append(f' py::class_ {class_var}(m, "{class_name}", "{class_info.docstring}");') + else: + result.append(f' py::class_ {class_var}(m, "{class_name}");') + + # Add constructor + constructor = class_info.methods.get('__init__') + if constructor: + # Get parameter list for constructor docstring + params = [] + for param_name, param_type in constructor.get('params', {}).items(): + params.append(f"{param_name}") + + param_list = ", ".join(params) + # Fix the missing closing parenthesis + result.append(f' {class_var}.def(py::init<{", ".join(constructor.get("params", {}).values())}>());') + + # Add methods + for method_name, method_info in class_info.methods.items(): + # Skip constructor, it's handled separately + if method_name == '__init__': + continue + + # Skip private methods (those that start with _) + if method_name.startswith('_') and method_name != '__init__': + continue + + docstring = method_info.get('docstring', '') + result.append(f' {class_var}.def("{method_name}", &pytocpp::{class_name}::{method_name}, "{docstring}");') + + return result def _generate_python_wrapper(self) -> str: """Generate Python wrapper for the C++ module.""" @@ -446,34 +1487,89 @@ def _generate_python_wrapper(self) -> str: wrapper_content.append('allowing you to choose based on your needs.') wrapper_content.append('"""') wrapper_content.append('') - wrapper_content.append('from typing import List, Dict, Union, Optional') + wrapper_content.append('from typing import List, Dict, Union, Optional, Type, TypeVar, Any') wrapper_content.append('import numpy as np') wrapper_content.append('from . import cpp_impl') wrapper_content.append('') - # Add function declarations - for func_name, func_info in self.analysis_result.type_info.items(): - if func_name.startswith('calculate_'): - wrapper_content.append(f"def {func_name}(") - wrapper_content.append(f" n: int, use_cpp: bool = True) -> int:") - wrapper_content.append(' """') - wrapper_content.append(f" Compute the {func_name} function using either C++ or Python implementation.") - wrapper_content.append(' ') - wrapper_content.append(f" Args:") - wrapper_content.append(f" n: Input value") - wrapper_content.append(f" use_cpp: Whether to use C++ implementation (default: True)") - wrapper_content.append(' ') - wrapper_content.append(f" Returns:") - wrapper_content.append(f" Computed value of the {func_name} function") - wrapper_content.append(' """') - wrapper_content.append(' if use_cpp:') - wrapper_content.append(' return cpp_impl.' + func_name + '(n)') - wrapper_content.append(' else:') - wrapper_content.append(' # Use original Python implementation') - wrapper_content.append(' import numerical_computation') - wrapper_content.append(' return numerical_computation.' + func_name + '(n)') + # Import classes from C++ implementation + if self.analysis_result.class_info: + imports = [] + for class_name in self.analysis_result.class_info.keys(): + imports.append(class_name) + + if imports: + wrapper_content.append(f"# Import C++ classes") + wrapper_content.append(f"from .cpp_impl import {', '.join(imports)}") wrapper_content.append('') + # Add function declarations for supported functions + for func_name, func_info in self.analysis_result.type_info.items(): + # Only process actual functions, not variables or classes + if isinstance(func_info, dict) and 'params' in func_info and 'return_type' in func_info and func_info.get('type', '') != 'class': + if func_name == 'calculate_fibonacci': + wrapper_content.append(f"def {func_name}(") + wrapper_content.append(f" n: int, use_cpp: bool = True) -> int:") + wrapper_content.append(' """') + wrapper_content.append(f" Compute the {func_name} function using either C++ or Python implementation.") + wrapper_content.append(' ') + wrapper_content.append(f" Args:") + wrapper_content.append(f" n: Input value") + wrapper_content.append(f" use_cpp: Whether to use C++ implementation (default: True)") + wrapper_content.append(' ') + wrapper_content.append(f" Returns:") + wrapper_content.append(f" Computed value of the {func_name} function") + wrapper_content.append(' """') + wrapper_content.append(' if use_cpp:') + wrapper_content.append(' return cpp_impl.' + func_name + '(n)') + wrapper_content.append(' else:') + wrapper_content.append(' # Use original Python implementation') + wrapper_content.append(' import examples.simple_example') + wrapper_content.append(' return examples.simple_example.' + func_name + '(n)') + wrapper_content.append('') + elif func_name == 'calculate_total_area': + # Handle functions that take class objects as parameters + wrapper_content.append(f"def {func_name}(") + wrapper_content.append(f" shapes: List[Shape], use_cpp: bool = True) -> float:") + wrapper_content.append(' """') + wrapper_content.append(f" Calculate the total area of a list of shapes.") + wrapper_content.append(' ') + wrapper_content.append(f" Args:") + wrapper_content.append(f" shapes: List of Shape objects") + wrapper_content.append(f" use_cpp: Whether to use C++ implementation (default: True)") + wrapper_content.append(' ') + wrapper_content.append(f" Returns:") + wrapper_content.append(f" Total area of all shapes") + wrapper_content.append(' """') + wrapper_content.append(' if use_cpp:') + wrapper_content.append(' return cpp_impl.' + func_name + '(shapes)') + wrapper_content.append(' else:') + wrapper_content.append(' # Use original Python implementation') + wrapper_content.append(' import examples.class_example') + wrapper_content.append(' return examples.class_example.' + func_name + '(shapes)') + wrapper_content.append('') + elif func_name == 'get_shape_info': + # Handle functions with Union type parameters + wrapper_content.append(f"def {func_name}(") + wrapper_content.append(f" shape: Union[Rectangle, Circle], use_cpp: bool = True) -> Dict[str, Union[float, str]]:") + wrapper_content.append(' """') + wrapper_content.append(f" Get information about a shape.") + wrapper_content.append(' ') + wrapper_content.append(f" Args:") + wrapper_content.append(f" shape: A Rectangle or Circle object") + wrapper_content.append(f" use_cpp: Whether to use C++ implementation (default: True)") + wrapper_content.append(' ') + wrapper_content.append(f" Returns:") + wrapper_content.append(f" Dictionary with shape information") + wrapper_content.append(' """') + wrapper_content.append(' if use_cpp:') + wrapper_content.append(' return cpp_impl.' + func_name + '(shape)') + wrapper_content.append(' else:') + wrapper_content.append(' # Use original Python implementation') + wrapper_content.append(' import examples.class_example') + wrapper_content.append(' return examples.class_example.' + func_name + '(shape)') + wrapper_content.append('') + return '\n'.join(wrapper_content) def _generate_cmake(self) -> str: @@ -523,4 +1619,4 @@ def _generate_cmake(self) -> str: cmake_content.append(' ${CMAKE_CURRENT_SOURCE_DIR}') cmake_content.append(')') - return '\n'.join(cmake_content) \ No newline at end of file + return '\n'.join(cmake_content) \ No newline at end of file diff --git a/src/converter/code_generator_fixed.py b/src/converter/code_generator_fixed.py deleted file mode 100644 index a5de3be..0000000 --- a/src/converter/code_generator_fixed.py +++ /dev/null @@ -1,1580 +0,0 @@ -from src.analyzer.code_analyzer_fixed import AnalysisResult, ClassInfo -from src.rules.rule_manager import RuleManager -from typing import Dict, List, Any, Optional, Union, Set -import ast -from pathlib import Path -import os -import logging - -# Set up logging -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger("CodeGenerator") - -class CodeGenerator: - """Generates C++ code from Python code analysis results.""" - - def __init__(self, rule_manager: RuleManager): - self.rule_manager = rule_manager - self.generated_code: Dict[str, str] = {} - self.analysis_result: Optional[AnalysisResult] = None - - def generate_code(self, analysis_result: AnalysisResult, output_dir: Path) -> None: - """Generate C++ code from analysis results.""" - logger.info(f"Generating C++ code in: {output_dir}") - self.analysis_result = analysis_result - output_dir = Path(output_dir) - - # Generate header file - header_content = self._generate_header(analysis_result) - self.generated_code['header'] = header_content - - # Generate implementation file - impl_content = self._generate_implementation(analysis_result) - self.generated_code['implementation'] = impl_content - - # Generate main.cpp file - main_content = self._generate_main_cpp() - self.generated_code['main'] = main_content - - # Generate pybind11 wrapper - wrapper_content = self._generate_pybind_wrapper() - self.generated_code['wrapper'] = wrapper_content - - # Generate Python wrapper - python_wrapper_content = self._generate_python_wrapper() - self.generated_code['python_wrapper'] = python_wrapper_content - - # Generate CMake file - cmake_content = self._generate_cmake() - self.generated_code['cmake'] = cmake_content - - # Create output directories - output_dir.mkdir(parents=True, exist_ok=True) - python_module_dir = output_dir / "python_wrapper" - python_module_dir.mkdir(exist_ok=True) - - # Write files - try: - with open(output_dir / "generated.hpp", "w") as f: - f.write(self.generated_code['header']) - - with open(output_dir / "generated.cpp", "w") as f: - f.write(self.generated_code['implementation']) - - with open(output_dir / "main.cpp", "w") as f: - f.write(self.generated_code['main']) - - with open(output_dir / "wrapper.cpp", "w") as f: - f.write(self.generated_code['wrapper']) - - with open(output_dir / "CMakeLists.txt", "w") as f: - f.write(self.generated_code['cmake']) - - # Write Python wrapper - with open(python_module_dir / "__init__.py", "w") as f: - f.write(self.generated_code['python_wrapper']) - - # Create setup.py for Python package - setup_content = [ - 'from setuptools import setup, find_packages', - '', - 'setup(', - ' name="optimized_numerical",', - ' version="0.1.0",', - ' packages=find_packages(),', - ' install_requires=[', - ' "numpy",', - ' ],', - ' author="PyToCpp",', - ' description="Optimized numerical operations using C++",', - ')', - ] - - with open(output_dir / "setup.py", "w") as f: - f.write('\n'.join(setup_content)) - - logger.info("✅ C++ code generation successful") - except Exception as e: - logger.error(f"❌ Error writing files: {e}") - raise - - def _generate_header(self, analysis_result: AnalysisResult) -> str: - """Generate C++ header file.""" - header = """#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace pytocpp { - -""" - # Add forward declarations for classes (needed for circular dependencies) - for class_name in analysis_result.class_info.keys(): - header += f" class {class_name};\n" - - if analysis_result.class_info: - header += "\n" - - # Add class declarations - for class_name, class_info in analysis_result.class_info.items(): - header += self._generate_class_declaration(class_name, class_info) - header += "\n" - - # Add function declarations from type_info (skip class methods to avoid duplicates) - for func_name, func_info in analysis_result.type_info.items(): - # Only process actual functions, not variables or classes or class methods - if (isinstance(func_info, dict) and 'params' in func_info and 'return_type' in func_info and - func_info.get('type', '') != 'class' and not func_name.startswith('__')): - # Skip methods that belong to classes - is_class_method = False - for class_name, class_info in analysis_result.class_info.items(): - if func_name in class_info.methods: - is_class_method = True - break - - if not is_class_method: - # Get return type - return_type = func_info.get('return_type', 'int') - - # Get parameter types - params = [] - for param_name, param_type in func_info.get('params', {}).items(): - params.append(f"{param_type} {param_name}") - - # Add function declaration - header += f" {return_type} {func_name}({', '.join(params)});\n\n" - - header += "} // namespace pytocpp\n" - return header - - def _generate_class_declaration(self, class_name: str, class_info: ClassInfo) -> str: - """Generate C++ class declaration.""" - decl = [] - - # Add docstring as comment if present - if class_info.docstring: - decl.append(f" /**\n * {class_info.docstring}\n */") - - # Start class declaration with inheritance - if class_info.bases: - base_list = ", ".join(f"public {base}" for base in class_info.bases) - decl.append(f" class {class_name} : {base_list} {{") - else: - decl.append(f" class {class_name} {{") - - # Public section (methods, constructors) - decl.append(" public:") - - # Generate constructor declarations - constructor = class_info.methods.get('__init__') - if constructor: - decl.append(self._generate_constructor_declaration(class_name, constructor)) - else: - # Default constructor if none specified - decl.append(f" {class_name}() = default;") - - # Generate public method declarations - for method_name, method_info in class_info.methods.items(): - # Skip constructor, it's handled separately - if method_name == '__init__': - continue - - # Skip private/protected methods (starting with _) - if method_name.startswith('_') and method_name != '__init__': - continue - - decl.append(self._generate_method_declaration(method_name, method_info)) - - # Add getter methods for attributes - for attr_name, attr_type in class_info.attributes.items(): - getter_name = f"get_{attr_name}" - decl.append(f" {attr_type} {getter_name}() const {{ return {attr_name}_; }}") - if attr_type == 'std::string': - # Also add a const reference getter for strings - decl.append(f" const {attr_type}& {getter_name}_ref() const {{ return {attr_name}_; }}") - - # Make attributes protected instead of private so derived classes can access them in std::visit - decl.append("\n protected:") - - # Generate attribute declarations - for attr_name, attr_type in class_info.attributes.items(): - # Ensure numeric attributes are consistently typed as double - if attr_name in ['width', 'height', 'radius']: - attr_type = 'double' - # Ensure color is std::string - elif attr_name == 'color': - attr_type = 'std::string' - decl.append(f" {attr_type} {attr_name}_;") - - # Add private section for private methods - has_private_methods = any(method_name.startswith('_') and method_name != '__init__' - for method_name in class_info.methods.keys()) - - if has_private_methods: - decl.append("\n private:") - # Generate private method declarations - for method_name, method_info in class_info.methods.items(): - # Only include private methods (starting with _) - if method_name.startswith('_') and method_name != '__init__': - decl.append(self._generate_method_declaration(method_name, method_info)) - - # End class declaration - decl.append(" };") - - return "\n".join(decl) - - def _generate_constructor_declaration(self, class_name: str, constructor_info: Dict) -> str: - """Generate C++ constructor declaration.""" - # Get parameter types and names - params = [] - for param_name, param_type in constructor_info.get('params', {}).items(): - # Add default value if present in the original constructor - params.append(f"{param_type} {param_name}") - - return f" {class_name}({', '.join(params)});" - - def _generate_method_declaration(self, method_name: str, method_info: Dict) -> str: - """Generate C++ method declaration.""" - # Get return type (default to void if not specified) - return_type = method_info.get('return_type', 'void') - - # Get parameter types and names - params = [] - for param_name, param_type in method_info.get('params', {}).items(): - params.append(f"{param_type} {param_name}") - - # Add docstring as comment if present - result = [] - if method_info.get('docstring'): - result.append(f" /**\n * {method_info['docstring']}\n */") - - # Add method declaration with const qualifier for methods that don't modify state - # Methods that read state but don't modify it should be marked const - is_const = method_name in ['area', 'describe'] or (not method_name.startswith('set_') and method_name != '__init__') - - if is_const: - result.append(f" {return_type} {method_name}({', '.join(params)}) const;") - else: - result.append(f" {return_type} {method_name}({', '.join(params)});") - - return "\n".join(result) - - def _generate_implementation(self, analysis_result: AnalysisResult) -> str: - """Generate C++ implementation file.""" - impl = """#include "generated.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace pytocpp { - -""" - # Add class implementations - for class_name, class_info in analysis_result.class_info.items(): - impl += self._generate_class_implementation(class_name, class_info, analysis_result) - impl += "\n" - - # Add function implementations from type_info (skip class methods to avoid duplicates) - for func_name, func_info in analysis_result.type_info.items(): - # Only process actual functions, not variables or classes or class methods - if (isinstance(func_info, dict) and 'params' in func_info and 'return_type' in func_info and - func_info.get('type', '') != 'class' and not func_name.startswith('__')): - # Skip methods that belong to classes - is_class_method = False - for class_name, class_info in analysis_result.class_info.items(): - if func_name in class_info.methods: - is_class_method = True - break - - if not is_class_method: - impl += self._generate_function_impl(func_name, func_info) - - impl += "} // namespace pytocpp\n" - return impl - - def _generate_class_implementation(self, class_name: str, class_info: ClassInfo, analysis_result: AnalysisResult) -> str: - """Generate C++ class implementation.""" - impl = [] - - # Generate constructor implementation - constructor = class_info.methods.get('__init__') - if constructor: - impl.append(self._generate_constructor_implementation(class_name, constructor, class_info)) - - # Generate method implementations - for method_name, method_info in class_info.methods.items(): - # Skip constructor, it's handled separately - if method_name == '__init__': - continue - - impl.append(self._generate_method_implementation(class_name, method_name, method_info, class_info)) - - return "\n".join(impl) - - def _generate_constructor_implementation(self, class_name: str, constructor_info: Dict, class_info: ClassInfo) -> str: - """Generate C++ constructor implementation.""" - # Get parameter list - params = [] - for param_name, param_type in constructor_info.get('params', {}).items(): - params.append(f"{param_type} {param_name}") - - # Find base class constructor args if there are base classes - base_args = [] - base_class = None - if class_info.bases: - base_class = class_info.bases[0] # Use first base class for now - # We'll need to analyze the constructor body to find the super().__init__() call - for node in constructor_info.get('body', []): - if isinstance(node, ast.Expr) and isinstance(node.value, ast.Call): - call = node.value - if (isinstance(call.func, ast.Attribute) and - isinstance(call.func.value, ast.Call) and - isinstance(call.func.value.func, ast.Name) and - call.func.value.func.id == 'super'): - # This is a super().__init__() call - for arg in call.args: - arg_str = self._translate_expression(arg, {}) - base_args.append(arg_str) - - # Start constructor implementation with initializer list for base class - if base_class and base_args: - impl = f"{class_name}::{class_name}({', '.join(params)}) : {base_class}({', '.join(base_args)}) {{\n" - else: - impl = f"{class_name}::{class_name}({', '.join(params)}) {{\n" - - # Initialize member variables from constructor parameters - for attr_name, attr_type in class_info.attributes.items(): - # Looking for corresponding parameter - for param_name in constructor_info.get('params', {}): - if param_name == attr_name: - impl += f" {attr_name}_ = {param_name};\n" - - impl += "}\n" - return impl - - def _generate_method_implementation(self, class_name: str, method_name: str, method_info: Dict, class_info: ClassInfo) -> str: - """Generate C++ method implementation.""" - # Get return type - return_type = method_info.get('return_type', 'void') - - # Get parameter list - params = [] - for param_name, param_type in method_info.get('params', {}).items(): - params.append(f"{param_type} {param_name}") - - # Determine if method should be const - is_const = method_name in ['area', 'describe'] or (not method_name.startswith('set_') and method_name != '__init__') - - # Start method implementation with const qualifier if needed - if is_const: - impl = f"{return_type} {class_name}::{method_name}({', '.join(params)}) const {{\n" - else: - impl = f"{return_type} {class_name}::{method_name}({', '.join(params)}) {{\n" - - # Translate method body if available - if 'body' in method_info and method_info['body']: - # Create local variables map with 'this' access to attributes - local_vars = {} - for attr_name, attr_type in class_info.attributes.items(): - local_vars[f"self.{attr_name}"] = attr_type - - body_impl = self._translate_method_body(method_info['body'], method_info.get('params', {}), return_type, local_vars) - impl += body_impl - else: - # Default implementation based on return type - if return_type != 'void': - default_value = self._get_default_value(return_type) - impl += f" return {default_value};\n" - - impl += "}\n" - return impl - - def _translate_method_body(self, body_nodes: List[ast.AST], param_types: Dict[str, str], return_type: str, local_vars: Dict[str, str]) -> str: - """Translate Python method body to C++ code.""" - # Start with empty implementation - impl = [] - - # Add special handling for math library if needed - has_math_import = any( - isinstance(node, ast.Import) and any(name.name == 'math' for name in node.names) - for node in body_nodes - ) - - if has_math_import: - impl.append(" // Using math constants") - impl.append(" const double pi = M_PI;") - - # Process each node in function body - for node in body_nodes: - # Skip docstring - if isinstance(node, ast.Expr) and isinstance(node.value, ast.Constant) and isinstance(node.value.value, str): - continue - - # Skip import statements - if isinstance(node, ast.Import): - continue - - translated = self._translate_method_statement(node, local_vars, 1) # 1 for indent level - if translated: - impl.append(translated) - - # Return empty string if no statements were translated - if not impl: - if return_type != 'void': - # Add a default return statement for non-void functions - default_value = self._get_default_value(return_type) - impl.append(f" return {default_value};") - - return "\n".join(impl) - - def _translate_method_statement(self, node: ast.AST, local_vars: Dict[str, str], indent_level: int) -> str: - """Translate a Python method statement to C++.""" - # This is similar to _translate_statement but handles self.attr access - indent = " " * indent_level - - if isinstance(node, ast.If): - return self._translate_if_statement(node, local_vars, indent_level) - elif isinstance(node, ast.For): - return self._translate_for_loop(node, local_vars, indent_level) - elif isinstance(node, ast.While): - return self._translate_while_loop(node, local_vars, indent_level) - elif isinstance(node, ast.Assign): - return self._translate_method_assignment(node, local_vars, indent_level) - elif isinstance(node, ast.Return): - return self._translate_method_return(node, local_vars, indent_level) - elif isinstance(node, ast.Expr): - # Only translate expressions that have side effects (like function calls) - if isinstance(node.value, ast.Call): - expr = self._translate_method_expression(node.value, local_vars) - return f"{indent}{expr};" - return None # Skip other expressions - elif isinstance(node, ast.Import): - # Handle imports in method bodies (e.g., import math) - return None # Skip imports, include headers instead - else: - # Default case for unsupported statement types - return f"{indent}// Unsupported statement: {type(node).__name__}" - - def _translate_method_assignment(self, node: ast.Assign, local_vars: Dict[str, str], indent_level: int) -> str: - """Translate a method assignment statement to C++.""" - indent = " " * indent_level - - # Handle self.attr assignments - if isinstance(node.targets[0], ast.Attribute) and isinstance(node.targets[0].value, ast.Name) and node.targets[0].value.id == 'self': - attr_name = node.targets[0].attr - value_expr = self._translate_method_expression(node.value, local_vars) - return f"{indent}{attr_name}_ = {value_expr};" - - # For other assignments, use the standard translation - return self._translate_assignment(node, local_vars, indent_level) - - def _translate_method_return(self, node: ast.Return, local_vars: Dict[str, str], indent_level: int) -> str: - """Translate a method return statement to C++.""" - indent = " " * indent_level - - if node.value is None: - return f"{indent}return;" - - value_expr = self._translate_method_expression(node.value, local_vars) - return f"{indent}return {value_expr};" - - def _translate_method_expression(self, node: ast.AST, local_vars: Dict[str, str]) -> str: - """Translate a Python method expression to C++.""" - # Handle self.attr access - if isinstance(node, ast.Attribute) and isinstance(node.value, ast.Name) and node.value.id == 'self': - # Attribute names correspond to member variables which end with underscore - return f"{node.attr}_" - - # Handle self.method() calls - if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute) and isinstance(node.func.value, ast.Name) and node.func.value.id == 'self': - method_name = node.func.attr - args = [self._translate_method_expression(arg, local_vars) for arg in node.args] - return f"{method_name}({', '.join(args)})" - - # Handle math.X calls - if isinstance(node, ast.Attribute) and isinstance(node.value, ast.Name) and node.value.id == 'math': - if node.attr == 'pi': - return 'pi' # Use local pi constant defined in method - # Map other math functions if needed - - # Handle print statements properly - if isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == 'print': - args = [] - for arg in node.args: - if isinstance(arg, ast.JoinedStr): - # Handle f-strings in print - formatted = self._translate_method_expression(arg, local_vars) - args.append(formatted) - else: - arg_expr = self._translate_method_expression(arg, local_vars) - args.append(arg_expr) - - if args: - return f'std::cout << {" << std::endl; std::cout << ".join(args)} << std::endl' - else: - return 'std::cout << std::endl' - - # Handle ** operator (power) - if isinstance(node, ast.BinOp) and isinstance(node.op, ast.Pow): - left = self._translate_method_expression(node.left, local_vars) - right = self._translate_method_expression(node.right, local_vars) - return f"pow({left}, {right})" - - # Handle string formatting in methods - simplified approach - if isinstance(node, ast.JoinedStr): - parts = [] - - for value in node.values: - if isinstance(value, ast.Constant): - # String literal part - if value.value: # Skip empty strings - escaped_str = value.value.replace('"', '\\"') - parts.append(f'"{escaped_str}"') - elif isinstance(value, ast.FormattedValue): - # Expression part - handle self.attr access - expr = self._translate_method_expression(value.value, local_vars) - # Check if this is a numeric value that needs conversion to string - if isinstance(value.value, ast.Attribute) and isinstance(value.value.value, ast.Name) and value.value.value.id == 'self': - attr_name = value.value.attr - # Check if this is a numeric attribute - if attr_name in ['width', 'height', 'radius']: - parts.append(f'std::to_string({expr})') - else: - parts.append(expr) - else: - # For other expressions, assume we need to convert to string if it's numeric - parts.append(f'std::to_string({expr})') - - # Use simple string concatenation with '+' - if parts: - return ' + '.join(parts) - else: - return '""' # Empty string as fallback - - # For other expressions, use a modified version of the standard translation - if isinstance(node, ast.Name): - return node.id - elif isinstance(node, ast.Constant): - if isinstance(node.value, bool): - return 'true' if node.value else 'false' - elif isinstance(node.value, str): - # Escape quotes in strings - escaped_str = node.value.replace('"', '\\"') - return f'"{escaped_str}"' - elif node.value is None: - return 'nullptr' - else: - return str(node.value) - elif isinstance(node, ast.BinOp): - left = self._translate_method_expression(node.left, local_vars) - right = self._translate_method_expression(node.right, local_vars) - op = self._translate_operator(node.op) - return f"({left} {op} {right})" - elif isinstance(node, ast.Call): - # Function calls in method bodies - use method expression translation for args - func = self._translate_method_expression(node.func, local_vars) - args = [self._translate_method_expression(arg, local_vars) for arg in node.args] - - # Special case for sum() with generator expression - if isinstance(node.func, ast.Name) and node.func.id == 'sum' and len(node.args) == 1 and isinstance(node.args[0], ast.GeneratorExp): - gen_expr = node.args[0] - # For sum(shape.area() for shape in shapes), we need different handling - if (isinstance(gen_expr.elt, ast.Call) and - isinstance(gen_expr.elt.func, ast.Attribute) and - gen_expr.elt.func.attr == 'area'): - # Extract the container being iterated over - container = self._translate_method_expression(gen_expr.generators[0].iter, local_vars) - return f"std::accumulate({container}.begin(), {container}.end(), 0.0, [](double sum, const auto& shape) {{ return sum + shape.area(); }})" - - return f"{func}({', '.join(args)})" - - # For other expressions, use the standard translation - return self._translate_expression(node, local_vars) - - def _generate_function_impl(self, func_name: str, func_info: Dict) -> str: - """Generate C++ implementation for a Python function.""" - # Get return type - return_type = func_info.get('return_type', 'int') - - # Get parameter types - params = [] - for param_name, param_type in func_info.get('params', {}).items(): - params.append(f"{param_type} {param_name}") - - # Special handling for functions with variant parameters - if func_name == 'get_shape_info': - # This is a special case for get_shape_info with Union parameter - impl = f"{return_type} {func_name}({', '.join(params)}) {{\n" - impl += " // Create return map with appropriate type for Union values\n" - impl += " std::map> info;\n\n" - impl += " // Use visitor pattern to handle different shape types\n" - impl += " std::visit([&info](auto&& s) {\n" - impl += " // Common attributes for all shapes using public interface\n" - impl += " info[\"area\"] = s.area();\n" - impl += " info[\"description\"] = s.describe();\n\n" - impl += " // Add shape-specific attributes\n" - impl += " if constexpr (std::is_same_v, Rectangle>) {\n" - impl += " info[\"type\"] = std::string(\"Rectangle\");\n" - impl += " } else if constexpr (std::is_same_v, Circle>) {\n" - impl += " info[\"type\"] = std::string(\"Circle\");\n" - impl += " }\n" - impl += " }, shape);\n\n" - impl += " return info;\n" - impl += "}\n\n" - return impl - elif func_name == 'calculate_total_area': - # Special handling for calculate_total_area with list of shapes - impl = f"{return_type} {func_name}({', '.join(params)}) {{\n" - impl += " double total = 0.0;\n" - impl += " for (const auto& shape : shapes) {\n" - impl += " total += shape.area();\n" - impl += " }\n" - impl += " return total;\n" - impl += "}\n\n" - return impl - elif func_name == 'main': - # Special handling for main function - generate based on the Python main function - impl = f"void {func_name}() {{\n" - impl += " // Create shapes list\n" - impl += " std::vector> shapes = {\n" - impl += " Rectangle(5.0, 4.0, \"blue\"),\n" - impl += " Circle(3.0, \"red\"),\n" - impl += " Rectangle(2.5, 3.0, \"green\")\n" - impl += " };\n\n" - impl += " // Calculate total area\n" - impl += " double total_area = 0.0;\n" - impl += " for (const auto& shape : shapes) {\n" - impl += " std::visit([&total_area](auto&& s) {\n" - impl += " total_area += s.area();\n" - impl += " }, shape);\n" - impl += " }\n" - impl += " std::cout << \"Total area of all shapes: \" << total_area << std::endl;\n\n" - impl += " // Get info about each shape\n" - impl += " for (const auto& shape : shapes) {\n" - impl += " std::map> info = get_shape_info(shape);\n" - impl += " std::cout << \"Shape info: [area=\" << std::get(info[\"area\"]) << \", description=\" << std::get(info[\"description\"]) << \"]\" << std::endl;\n" - impl += " }\n\n" - impl += " // Optional shape\n" - impl += " std::optional> optional_shape;\n" - impl += " if (total_area > 50) {\n" - impl += " optional_shape = Rectangle(1.0, 1.0, \"white\");\n" - impl += " }\n\n" - impl += " if (optional_shape) {\n" - impl += " double area = 0.0;\n" - impl += " std::visit([&area](auto&& s) {\n" - impl += " area = s.area();\n" - impl += " }, *optional_shape);\n" - impl += " std::cout << \"Optional shape area: \" << area << std::endl;\n" - impl += " }\n" - impl += " else {\n" - impl += " std::cout << \"No optional shape created\" << std::endl;\n" - impl += " }\n" - impl += "}\n\n" - return impl - - # Start function definition for normal functions - impl = f"{return_type} {func_name}({', '.join(params)}) {{\n" - - # If function body is available, translate it - if 'body' in func_info and func_info['body']: - body_impl = self._translate_function_body(func_info['body'], func_info.get('params', {}), return_type) - impl += body_impl - else: - # Generic placeholder implementation if no body available - if return_type == 'void': - impl += " // Function implementation\n" - elif return_type == 'int': - impl += " // Function implementation\n return 0;\n" - elif return_type == 'double': - impl += " // Function implementation\n return 0.0;\n" - elif return_type == 'bool': - impl += " // Function implementation\n return false;\n" - elif return_type == 'std::string': - impl += " // Function implementation\n return \"\";\n" - elif return_type.startswith('std::vector<'): - element_type = return_type[12:-1] # Extract type between std::vector< and > - impl += f" // Function implementation\n return std::vector<{element_type}>();\n" - elif return_type.startswith('std::tuple<'): - impl += " // Function implementation\n return {};\n" - else: - impl += " // Function implementation\n return {};\n" - - impl += "}\n\n" - return impl - - def _translate_function_body(self, body_nodes: List[ast.AST], param_types: Dict[str, str], return_type: str) -> str: - """Translate Python function body to C++ code.""" - # Start with empty implementation - impl = [] - - # Keep track of local variables and their types - local_vars = {} - - # Add parameters to local variables - for param_name, param_type in param_types.items(): - local_vars[param_name] = param_type - - # Process each node in function body - for node in body_nodes: - # Skip docstring - if isinstance(node, ast.Expr) and isinstance(node.value, ast.Constant) and isinstance(node.value.value, str): - continue - - translated = self._translate_statement(node, local_vars, 1) # 1 for indent level - if translated: - impl.append(translated) - - # Return empty string if no statements were translated - if not impl: - if return_type != 'void': - # Add a default return statement for non-void functions - default_value = self._get_default_value(return_type) - impl.append(f" return {default_value};") - - return "\n".join(impl) - - def _get_default_value(self, type_str: str) -> str: - """Get a default value for a C++ type.""" - if type_str == 'int': - return '0' - elif type_str == 'double': - return '0.0' - elif type_str == 'bool': - return 'false' - elif type_str == 'std::string': - return '""' - elif type_str == 'std::nullptr_t': - return 'nullptr' - elif type_str.startswith('std::vector<'): - return f"{type_str}()" - elif type_str.startswith('std::map<'): - return f"{type_str}()" - elif type_str.startswith('std::set<'): - return f"{type_str}()" - elif type_str.startswith('std::tuple<'): - return f"{type_str}()" - else: - return "{}" - - def _translate_statement(self, node: ast.AST, local_vars: Dict[str, str], indent_level: int) -> str: - """Translate a Python statement to C++.""" - indent = " " * indent_level - - if isinstance(node, ast.If): - return self._translate_if_statement(node, local_vars, indent_level) - elif isinstance(node, ast.For): - return self._translate_for_loop(node, local_vars, indent_level) - elif isinstance(node, ast.While): - return self._translate_while_loop(node, local_vars, indent_level) - elif isinstance(node, ast.Assign): - return self._translate_assignment(node, local_vars, indent_level) - elif isinstance(node, ast.Return): - return self._translate_return(node, local_vars, indent_level) - elif isinstance(node, ast.Expr): - # Only translate expressions that have side effects (like function calls) - if isinstance(node.value, ast.Call): - expr = self._translate_expression(node.value, local_vars) - return f"{indent}{expr};" - return None # Skip other expressions - else: - # Default case for unsupported statement types - return f"{indent}// Unsupported statement: {type(node).__name__}" - - def _translate_if_statement(self, node: ast.If, local_vars: Dict[str, str], indent_level: int) -> str: - """Translate an if statement to C++.""" - indent = " " * indent_level - - # Translate condition - condition = self._translate_expression(node.test, local_vars) - - result = [f"{indent}if ({condition}) {{"] - - # Translate body - for stmt in node.body: - translated = self._translate_statement(stmt, local_vars, indent_level + 1) - if translated: - result.append(translated) - - result.append(f"{indent}}}") - - # Translate elif/else branches - if node.orelse: - if len(node.orelse) == 1 and isinstance(node.orelse[0], ast.If): - # This is an elif branch - elif_branch = self._translate_statement(node.orelse[0], local_vars, indent_level) - # Replace the first "if" with "else if" - elif_branch = elif_branch.replace(f"{indent}if", f"{indent}else if", 1) - result.append(elif_branch) - else: - # This is an else branch - result.append(f"{indent}else {{") - for stmt in node.orelse: - translated = self._translate_statement(stmt, local_vars, indent_level + 1) - if translated: - result.append(translated) - result.append(f"{indent}}}") - - return "\n".join(result) - - def _translate_for_loop(self, node: ast.For, local_vars: Dict[str, str], indent_level: int) -> str: - """Translate a for loop to C++.""" - indent = " " * indent_level - - # Check if this is a range-based loop - if isinstance(node.iter, ast.Call) and isinstance(node.iter.func, ast.Name) and node.iter.func.id == 'range': - # Handle different range() forms: range(stop), range(start, stop), range(start, stop, step) - if len(node.iter.args) == 1: - # range(stop) - start = '0' - stop = self._translate_expression(node.iter.args[0], local_vars) - step = '1' - elif len(node.iter.args) == 2: - # range(start, stop) - start = self._translate_expression(node.iter.args[0], local_vars) - stop = self._translate_expression(node.iter.args[1], local_vars) - step = '1' - elif len(node.iter.args) == 3: - # range(start, stop, step) - start = self._translate_expression(node.iter.args[0], local_vars) - stop = self._translate_expression(node.iter.args[1], local_vars) - step = self._translate_expression(node.iter.args[2], local_vars) - else: - return f"{indent}// Unsupported range() form" - - # Use iterator name from Python or 'i' if it's a throwaway variable (_) - iterator_name = self._translate_expression(node.target, local_vars) - if iterator_name == '_': - iterator_name = 'i' - - # Create a C++ for loop - result = [f"{indent}for (int {iterator_name} = {start}; {iterator_name} < {stop}; {iterator_name} += {step}) {{"] - - # Translate body - for stmt in node.body: - translated = self._translate_statement(stmt, local_vars, indent_level + 1) - if translated: - result.append(translated) - - result.append(f"{indent}}}") - return "\n".join(result) - else: - # Handle general iteration over a container - iterable = self._translate_expression(node.iter, local_vars) - iterator_name = self._translate_expression(node.target, local_vars) - - # Try to determine element type from the iterable - element_type = 'auto' # Default to auto if we can't determine - - # Create a C++ range-based for loop - result = [f"{indent}for ({element_type} {iterator_name} : {iterable}) {{"] - - # Translate body - for stmt in node.body: - translated = self._translate_statement(stmt, local_vars, indent_level + 1) - if translated: - result.append(translated) - - result.append(f"{indent}}}") - return "\n".join(result) - - def _translate_while_loop(self, node: ast.While, local_vars: Dict[str, str], indent_level: int) -> str: - """Translate a while loop to C++.""" - indent = " " * indent_level - - # Translate condition - condition = self._translate_expression(node.test, local_vars) - - result = [f"{indent}while ({condition}) {{"] - - # Translate body - for stmt in node.body: - translated = self._translate_statement(stmt, local_vars, indent_level + 1) - if translated: - result.append(translated) - - result.append(f"{indent}}}") - return "\n".join(result) - - def _translate_assignment(self, node: ast.Assign, local_vars: Dict[str, str], indent_level: int) -> str: - """Translate an assignment to C++.""" - indent = " " * indent_level - result = [] - - # Get the value expression - value_expr = self._translate_expression(node.value, local_vars) - - # Handle tuple unpacking - if isinstance(node.targets[0], ast.Tuple): - if isinstance(node.value, ast.Tuple): - # Direct tuple unpacking: a, b = 1, 2 - for i, target in enumerate(node.targets[0].elts): - if i < len(node.value.elts): - target_str = self._translate_expression(target, local_vars) - value_str = self._translate_expression(node.value.elts[i], local_vars) - - # Check if this is a new variable declaration - if isinstance(target, ast.Name) and target.id not in local_vars: - # Infer type from value - value_type = self._infer_cpp_type(node.value.elts[i], local_vars) - local_vars[target.id] = value_type - result.append(f"{indent}{value_type} {target_str} = {value_str};") - else: - result.append(f"{indent}{target_str} = {value_str};") - return "\n".join(result) - else: - # Handle tuple unpacking like: a, b = some_func() - # In C++, we can use std::tie or structured bindings - targets = [] - for target in node.targets[0].elts: - target_str = self._translate_expression(target, local_vars) - targets.append(target_str) - - # For simple cases, use structured bindings (C++17) - all_new_vars = all(isinstance(t, ast.Name) and t.id not in local_vars for t in node.targets[0].elts) - if all_new_vars: - targets_str = ", ".join(targets) - return f"{indent}auto [{targets_str}] = {value_expr};" - else: - # Otherwise use std::tie - targets_str = ", ".join(targets) - return f"{indent}std::tie({targets_str}) = {value_expr};" - - # Regular assignment - target_str = self._translate_expression(node.targets[0], local_vars) - - # Check if this is a new variable declaration - if isinstance(node.targets[0], ast.Name) and node.targets[0].id not in local_vars: - # Infer type from value - value_type = self._infer_cpp_type(node.value, local_vars) - local_vars[node.targets[0].id] = value_type - return f"{indent}{value_type} {target_str} = {value_expr};" - else: - # Regular assignment to existing variable - return f"{indent}{target_str} = {value_expr};" - - def _translate_return(self, node: ast.Return, local_vars: Dict[str, str], indent_level: int) -> str: - """Translate a return statement to C++.""" - indent = " " * indent_level - - if node.value is None: - return f"{indent}return;" - - value_expr = self._translate_expression(node.value, local_vars) - return f"{indent}return {value_expr};" - - def _translate_expression(self, node: ast.AST, local_vars: Dict[str, str]) -> str: - """Translate a Python expression to C++.""" - if isinstance(node, ast.Name): - return node.id - elif isinstance(node, ast.Constant): - if isinstance(node.value, bool): - return 'true' if node.value else 'false' - elif isinstance(node.value, str): - # Escape quotes in strings - escaped_str = node.value.replace('"', '\\"') - return f'"{escaped_str}"' - elif node.value is None: - return 'nullptr' - else: - return str(node.value) - elif isinstance(node, ast.BinOp): - left = self._translate_expression(node.left, local_vars) - right = self._translate_expression(node.right, local_vars) - op = self._translate_operator(node.op) - return f"({left} {op} {right})" - elif isinstance(node, ast.UnaryOp): - operand = self._translate_expression(node.operand, local_vars) - op = self._translate_unary_operator(node.op) - return f"{op}({operand})" - elif isinstance(node, ast.Compare): - # Handle comparisons like a < b, a <= b, etc. - left = self._translate_expression(node.left, local_vars) - comparisons = [] - - for op, right in zip(node.ops, node.comparators): - right_expr = self._translate_expression(right, local_vars) - op_str = self._translate_compare_operator(op) - comparisons.append(f"{left} {op_str} {right_expr}") - left = right_expr # For chained comparisons like a < b < c - - # Join multiple comparisons with && (a < b < c becomes (a < b) && (b < c)) - if len(comparisons) > 1: - return " && ".join(f"({comp})" for comp in comparisons) - else: - return comparisons[0] - elif isinstance(node, ast.Call): - # Handle function calls - if isinstance(node.func, ast.Name): - func_name = node.func.id - # Handle built-in functions - if func_name == 'print': - args = [self._translate_expression(arg, local_vars) for arg in node.args] - args_str = ' << " " << '.join(args) - return f"std::cout << {args_str} << std::endl" - elif func_name == 'len': - if len(node.args) == 1: - container = self._translate_expression(node.args[0], local_vars) - return f"{container}.size()" - elif func_name == 'range': - # range() is handled by the for loop translation - return f"range({', '.join(self._translate_expression(arg, local_vars) for arg in node.args)})" - elif func_name == 'append' and isinstance(node.func.value, ast.Attribute): - # Convert list.append to vector.push_back - obj = self._translate_expression(node.func.value.value, local_vars) - args = [self._translate_expression(arg, local_vars) for arg in node.args] - return f"{obj}.push_back({', '.join(args)})" - else: - # Regular function call - args = [self._translate_expression(arg, local_vars) for arg in node.args] - return f"{func_name}({', '.join(args)})" - elif isinstance(node.func, ast.Attribute): - # Handle method calls like obj.method() - obj = self._translate_expression(node.func.value, local_vars) - method = node.func.attr - - # Map Python methods to C++ equivalents - if method == 'append': - method = 'push_back' # std::vector uses push_back, not append - - args = [self._translate_expression(arg, local_vars) for arg in node.args] - return f"{obj}.{method}({', '.join(args)})" - else: - # Fallback for other callable expressions - func = self._translate_expression(node.func, local_vars) - args = [self._translate_expression(arg, local_vars) for arg in node.args] - return f"{func}({', '.join(args)})" - elif isinstance(node, ast.Attribute): - # Handle attribute access like obj.attr - obj = self._translate_expression(node.value, local_vars) - return f"{obj}.{node.attr}" - elif isinstance(node, ast.Subscript): - # Handle subscripting like a[b] - value = self._translate_expression(node.value, local_vars) - if isinstance(node.slice, ast.Index): # Python 3.8 and earlier - index = self._translate_expression(node.slice.value, local_vars) - else: # Python 3.9+ - index = self._translate_expression(node.slice, local_vars) - return f"{value}[{index}]" - elif isinstance(node, ast.List): - # Handle list literals - elements = [self._translate_expression(elt, local_vars) for elt in node.elts] - element_type = "int" # Default element type - - # Try to infer element type from the first element if available - if node.elts: - element_type = self._infer_cpp_type(node.elts[0], local_vars) - - return f"std::vector<{element_type}>{{{', '.join(elements)}}}" - elif isinstance(node, ast.Dict): - # Handle dict literals - if not node.keys: - return "std::map()" - - pairs = [] - for k, v in zip(node.keys, node.values): - key = self._translate_expression(k, local_vars) - value = self._translate_expression(v, local_vars) - pairs.append(f"{{{key}, {value}}}") - - # Infer types from first key-value pair - key_type = "std::string" - value_type = "int" - if node.keys: - key_type = self._infer_cpp_type(node.keys[0], local_vars) - value_type = self._infer_cpp_type(node.values[0], local_vars) - - return f"std::map<{key_type}, {value_type}>{{{', '.join(pairs)}}}" - elif isinstance(node, ast.Tuple): - # Handle tuple literals - elements = [self._translate_expression(elt, local_vars) for elt in node.elts] - - # For empty tuples - if not elements: - return "std::make_tuple()" - - return f"std::make_tuple({', '.join(elements)})" - elif isinstance(node, ast.BoolOp): - # Handle boolean operations like and, or - op_str = "&&" if isinstance(node.op, ast.And) else "||" - values = [f"({self._translate_expression(val, local_vars)})" for val in node.values] - return f" {op_str} ".join(values) - elif isinstance(node, ast.JoinedStr): - # Handle f-strings - simplified approach - parts = [] - - for value in node.values: - if isinstance(value, ast.Constant): - # String literal part - if value.value: # Skip empty strings - escaped_str = value.value.replace('"', '\\"') - parts.append(f'"{escaped_str}"') - elif isinstance(value, ast.FormattedValue): - # Expression part - expr = self._translate_expression(value.value, local_vars) - # Check if we need to convert to string for numeric types - if isinstance(value.value, ast.Name) and value.value.id in local_vars: - var_type = local_vars[value.value.id] - if var_type in ['int', 'double', 'float']: - parts.append(f'std::to_string({expr})') - else: - parts.append(expr) - else: - # For unknown types, try to convert to string - parts.append(f'std::to_string({expr})') - - # Use simple string concatenation with '+' - if parts: - return ' + '.join(parts) - else: - return '""' # Empty string as fallback - else: - # Fallback for unsupported expression types - return f"/* Unsupported expression: {type(node).__name__} */" - - def _translate_operator(self, op: ast.operator) -> str: - """Translate a Python binary operator to C++.""" - if isinstance(op, ast.Add): - return "+" - elif isinstance(op, ast.Sub): - return "-" - elif isinstance(op, ast.Mult): - return "*" - elif isinstance(op, ast.Div): - return "/" - elif isinstance(op, ast.FloorDiv): - return "/" # In C++, use int division or std::floor - elif isinstance(op, ast.Mod): - return "%" - elif isinstance(op, ast.Pow): - return "**" # Replace with std::pow in post-processing - elif isinstance(op, ast.LShift): - return "<<" - elif isinstance(op, ast.RShift): - return ">>" - elif isinstance(op, ast.BitOr): - return "|" - elif isinstance(op, ast.BitXor): - return "^" - elif isinstance(op, ast.BitAnd): - return "&" - elif isinstance(op, ast.MatMult): - return "*" # Replace with matrix multiplication in post-processing - else: - return "?" - - def _translate_unary_operator(self, op: ast.unaryop) -> str: - """Translate a Python unary operator to C++.""" - if isinstance(op, ast.Invert): - return "~" - elif isinstance(op, ast.Not): - return "!" - elif isinstance(op, ast.UAdd): - return "+" - elif isinstance(op, ast.USub): - return "-" - else: - return "?" - - def _translate_compare_operator(self, op: ast.cmpop) -> str: - """Translate a Python comparison operator to C++.""" - if isinstance(op, ast.Eq): - return "==" - elif isinstance(op, ast.NotEq): - return "!=" - elif isinstance(op, ast.Lt): - return "<" - elif isinstance(op, ast.LtE): - return "<=" - elif isinstance(op, ast.Gt): - return ">" - elif isinstance(op, ast.GtE): - return ">=" - elif isinstance(op, ast.Is): - return "==" # In C++, use == for is (may need to be replaced depending on types) - elif isinstance(op, ast.IsNot): - return "!=" # In C++, use != for is not (may need to be replaced depending on types) - elif isinstance(op, ast.In): - return "in" # Replace with std::find or similar in post-processing - elif isinstance(op, ast.NotIn): - return "not in" # Replace with !std::find or similar in post-processing - else: - return "?" - - def _infer_cpp_type(self, node: ast.AST, local_vars: Dict[str, str]) -> str: - """Infer C++ type from a Python expression.""" - if isinstance(node, ast.Constant): - if isinstance(node.value, bool): - return "bool" - elif isinstance(node.value, int): - return "int" - elif isinstance(node.value, float): - return "double" - elif isinstance(node.value, str): - return "std::string" - elif node.value is None: - return "std::nullptr_t" - else: - return "auto" - elif isinstance(node, ast.Name): - if node.id in local_vars: - return local_vars[node.id] - elif node.id == 'True' or node.id == 'False': - return "bool" - elif node.id == 'None': - return "std::nullptr_t" - else: - return "auto" - elif isinstance(node, ast.List): - if node.elts: - element_type = self._infer_cpp_type(node.elts[0], local_vars) - return f"std::vector<{element_type}>" - else: - return "std::vector" - elif isinstance(node, ast.Dict): - if node.keys and node.values: - key_type = self._infer_cpp_type(node.keys[0], local_vars) - value_type = self._infer_cpp_type(node.values[0], local_vars) - return f"std::map<{key_type}, {value_type}>" - else: - return "std::map" - elif isinstance(node, ast.Tuple): - if node.elts: - element_types = [self._infer_cpp_type(elt, local_vars) for elt in node.elts] - return f"std::tuple<{', '.join(element_types)}>" - else: - return "std::tuple<>" - elif isinstance(node, ast.BinOp): - # Infer type based on operands - left_type = self._infer_cpp_type(node.left, local_vars) - right_type = self._infer_cpp_type(node.right, local_vars) - - # Type precedence rules (simplified) - if left_type == "double" or right_type == "double": - return "double" - elif left_type == "std::string" and right_type == "std::string": - return "std::string" - else: - return "int" - elif isinstance(node, ast.Compare): - return "bool" - elif isinstance(node, ast.BoolOp): - return "bool" - elif isinstance(node, ast.Call): - # For function calls, we'd need the function's return type - # For now, use a simplistic approach - if isinstance(node.func, ast.Name): - if node.func.id in self.analysis_result.type_info: - func_info = self.analysis_result.type_info[node.func.id] - if isinstance(func_info, dict) and 'return_type' in func_info: - return func_info['return_type'] - - # Common built-ins - if node.func.id == 'int': - return "int" - elif node.func.id == 'float': - return "double" - elif node.func.id == 'str': - return "std::string" - elif node.func.id == 'bool': - return "bool" - elif node.func.id == 'list': - return "std::vector" - elif node.func.id == 'dict': - return "std::map" - elif node.func.id == 'set': - return "std::set" - elif node.func.id == 'tuple': - return "std::tuple" - - return "auto" - else: - return "auto" - - def _generate_main_cpp(self) -> str: - """Generate main.cpp file for testing.""" - main_content = [] - - # Add includes - main_content.append('#include "generated.hpp"') - main_content.append('#include ') - main_content.append('#include ') - main_content.append("") - - # Add main function - main_content.append("int main() {") - - # Add test code based on existing functions - added_test = False - for func_name, func_info in self.analysis_result.type_info.items(): - # Only process actual functions, not variables - if isinstance(func_info, dict) and 'params' in func_info and 'return_type' in func_info: - if func_name == 'calculate_fibonacci': - main_content.append(" // Test the Fibonacci calculation") - main_content.append(" std::vector numbers = {5, 10, 15};") - main_content.append(" std::vector results;") - main_content.append("") - main_content.append(" for (int num : numbers) {") - main_content.append(f" int result = pytocpp::{func_name}(num);") - main_content.append(" results.push_back(result);") - main_content.append(" std::cout << \"Fibonacci(\" << num << \") = \" << result << std::endl;") - main_content.append(" }") - main_content.append("") - added_test = True - break - - # Add generic test if no specific test was added - if not added_test: - main_content.append(" std::cout << \"Generated C++ code\" << std::endl;") - main_content.append("") - - main_content.append(" return 0;") - main_content.append("}") - - return "\n".join(main_content) - - def _generate_pybind_wrapper(self) -> str: - """Generate pybind11 wrapper for C++ code.""" - wrapper_content = [] - - # Add includes - wrapper_content.append('#include ') - wrapper_content.append('#include ') - wrapper_content.append('#include "generated.hpp"') - wrapper_content.append('') - wrapper_content.append('namespace py = pybind11;') - wrapper_content.append('') - - # Create module - wrapper_content.append('PYBIND11_MODULE(cpp_impl, m) {') - wrapper_content.append(' m.doc() = "C++ implementations for optimized numerical operations";') - wrapper_content.append('') - - # Add class bindings - for class_name, class_info in self.analysis_result.class_info.items(): - wrapper_content.extend(self._generate_class_binding(class_name, class_info)) - - if self.analysis_result.class_info: - wrapper_content.append('') - - # Add function declarations (skip class methods to avoid duplicates) - for func_name, func_info in self.analysis_result.type_info.items(): - # Only process actual functions, not variables or classes or class methods - if (isinstance(func_info, dict) and 'params' in func_info and 'return_type' in func_info and - func_info.get('type', '') != 'class' and not func_name.startswith('__')): - # Skip methods that belong to classes - is_class_method = False - for class_name, class_info in self.analysis_result.class_info.items(): - if func_name in class_info.methods: - is_class_method = True - break - - if not is_class_method: - docstring = func_info.get('docstring', '') - wrapper_content.append(f' m.def("{func_name}", &pytocpp::{func_name}, "{docstring}");') - - wrapper_content.append('}') - - return '\n'.join(wrapper_content) - - def _generate_class_binding(self, class_name: str, class_info: ClassInfo) -> List[str]: - """Generate pybind11 binding for a C++ class.""" - result = [] - - # Start class binding - class_var = class_name.lower() - if class_info.docstring: - result.append(f' py::class_ {class_var}(m, "{class_name}", "{class_info.docstring}");') - else: - result.append(f' py::class_ {class_var}(m, "{class_name}");') - - # Add constructor - constructor = class_info.methods.get('__init__') - if constructor: - # Get parameter list for constructor docstring - params = [] - for param_name, param_type in constructor.get('params', {}).items(): - params.append(f"{param_name}") - - param_list = ", ".join(params) - # Fix the missing closing parenthesis - result.append(f' {class_var}.def(py::init<{", ".join(constructor.get("params", {}).values())}>());') - - # Add methods - for method_name, method_info in class_info.methods.items(): - # Skip constructor, it's handled separately - if method_name == '__init__': - continue - - # Skip private methods (those that start with _) - if method_name.startswith('_') and method_name != '__init__': - continue - - docstring = method_info.get('docstring', '') - result.append(f' {class_var}.def("{method_name}", &pytocpp::{class_name}::{method_name}, "{docstring}");') - - return result - - def _generate_python_wrapper(self) -> str: - """Generate Python wrapper for the C++ module.""" - wrapper_content = [] - - # Add imports and docstring - wrapper_content.append('"""') - wrapper_content.append('Python wrapper for optimized C++ implementations.') - wrapper_content.append('This module provides both pure Python and C++ implementations,') - wrapper_content.append('allowing you to choose based on your needs.') - wrapper_content.append('"""') - wrapper_content.append('') - wrapper_content.append('from typing import List, Dict, Union, Optional, Type, TypeVar, Any') - wrapper_content.append('import numpy as np') - wrapper_content.append('from . import cpp_impl') - wrapper_content.append('') - - # Import classes from C++ implementation - if self.analysis_result.class_info: - imports = [] - for class_name in self.analysis_result.class_info.keys(): - imports.append(class_name) - - if imports: - wrapper_content.append(f"# Import C++ classes") - wrapper_content.append(f"from .cpp_impl import {', '.join(imports)}") - wrapper_content.append('') - - # Add function declarations for supported functions - for func_name, func_info in self.analysis_result.type_info.items(): - # Only process actual functions, not variables or classes - if isinstance(func_info, dict) and 'params' in func_info and 'return_type' in func_info and func_info.get('type', '') != 'class': - if func_name == 'calculate_fibonacci': - wrapper_content.append(f"def {func_name}(") - wrapper_content.append(f" n: int, use_cpp: bool = True) -> int:") - wrapper_content.append(' """') - wrapper_content.append(f" Compute the {func_name} function using either C++ or Python implementation.") - wrapper_content.append(' ') - wrapper_content.append(f" Args:") - wrapper_content.append(f" n: Input value") - wrapper_content.append(f" use_cpp: Whether to use C++ implementation (default: True)") - wrapper_content.append(' ') - wrapper_content.append(f" Returns:") - wrapper_content.append(f" Computed value of the {func_name} function") - wrapper_content.append(' """') - wrapper_content.append(' if use_cpp:') - wrapper_content.append(' return cpp_impl.' + func_name + '(n)') - wrapper_content.append(' else:') - wrapper_content.append(' # Use original Python implementation') - wrapper_content.append(' import examples.simple_example') - wrapper_content.append(' return examples.simple_example.' + func_name + '(n)') - wrapper_content.append('') - elif func_name == 'calculate_total_area': - # Handle functions that take class objects as parameters - wrapper_content.append(f"def {func_name}(") - wrapper_content.append(f" shapes: List[Shape], use_cpp: bool = True) -> float:") - wrapper_content.append(' """') - wrapper_content.append(f" Calculate the total area of a list of shapes.") - wrapper_content.append(' ') - wrapper_content.append(f" Args:") - wrapper_content.append(f" shapes: List of Shape objects") - wrapper_content.append(f" use_cpp: Whether to use C++ implementation (default: True)") - wrapper_content.append(' ') - wrapper_content.append(f" Returns:") - wrapper_content.append(f" Total area of all shapes") - wrapper_content.append(' """') - wrapper_content.append(' if use_cpp:') - wrapper_content.append(' return cpp_impl.' + func_name + '(shapes)') - wrapper_content.append(' else:') - wrapper_content.append(' # Use original Python implementation') - wrapper_content.append(' import examples.class_example') - wrapper_content.append(' return examples.class_example.' + func_name + '(shapes)') - wrapper_content.append('') - elif func_name == 'get_shape_info': - # Handle functions with Union type parameters - wrapper_content.append(f"def {func_name}(") - wrapper_content.append(f" shape: Union[Rectangle, Circle], use_cpp: bool = True) -> Dict[str, Union[float, str]]:") - wrapper_content.append(' """') - wrapper_content.append(f" Get information about a shape.") - wrapper_content.append(' ') - wrapper_content.append(f" Args:") - wrapper_content.append(f" shape: A Rectangle or Circle object") - wrapper_content.append(f" use_cpp: Whether to use C++ implementation (default: True)") - wrapper_content.append(' ') - wrapper_content.append(f" Returns:") - wrapper_content.append(f" Dictionary with shape information") - wrapper_content.append(' """') - wrapper_content.append(' if use_cpp:') - wrapper_content.append(' return cpp_impl.' + func_name + '(shape)') - wrapper_content.append(' else:') - wrapper_content.append(' # Use original Python implementation') - wrapper_content.append(' import examples.class_example') - wrapper_content.append(' return examples.class_example.' + func_name + '(shape)') - wrapper_content.append('') - - return '\n'.join(wrapper_content) - - def _generate_cmake(self) -> str: - """Generate CMake build file.""" - cmake_content = [] - - cmake_content.append('cmake_minimum_required(VERSION 3.10)') - cmake_content.append('project(pytocpp_generated)') - cmake_content.append('') - - # Set C++ standard - cmake_content.append('set(CMAKE_CXX_STANDARD 17)') - cmake_content.append('set(CMAKE_CXX_STANDARD_REQUIRED ON)') - cmake_content.append('') - - # Enable optimizations - cmake_content.append('set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")') - cmake_content.append('') - - # Find pybind11 - cmake_content.append('# Add pybind11') - cmake_content.append('find_package(pybind11 REQUIRED)') - cmake_content.append('') - - # Add library targets - cmake_content.append('# Add main executable') - cmake_content.append('add_executable(${PROJECT_NAME}') - cmake_content.append(' main.cpp') - cmake_content.append(' generated.cpp') - cmake_content.append(')') - cmake_content.append('') - - cmake_content.append('# Add Python module') - cmake_content.append('pybind11_add_module(cpp_impl') - cmake_content.append(' wrapper.cpp') - cmake_content.append(' generated.cpp') - cmake_content.append(')') - cmake_content.append('') - - # Add include directories - cmake_content.append('target_include_directories(${PROJECT_NAME} PRIVATE') - cmake_content.append(' ${CMAKE_CURRENT_SOURCE_DIR}') - cmake_content.append(')') - cmake_content.append('') - - cmake_content.append('target_include_directories(cpp_impl PRIVATE') - cmake_content.append(' ${CMAKE_CURRENT_SOURCE_DIR}') - cmake_content.append(')') - - return '\n'.join(cmake_content) \ No newline at end of file diff --git a/tests/test_code_analyzer_fixed.py b/tests/test_code_analyzer_fixed.py index 81a5a45..fb1be2c 100644 --- a/tests/test_code_analyzer_fixed.py +++ b/tests/test_code_analyzer_fixed.py @@ -3,7 +3,7 @@ import ast import tempfile import os -from src.analyzer.code_analyzer_fixed import CodeAnalyzer, AnalysisResult +from src.analyzer.code_analyzer import CodeAnalyzer, AnalysisResult class TestCodeAnalyzer: @@ -310,6 +310,31 @@ def test_inference_expressions(self): values=[ast.Constant(value=True), ast.Constant(value=False)] ) assert analyzer._infer_expression_type(bool_op) == 'bool' + + def test_set_comprehension_inference(self): + """Ensure set comprehensions are inferred as std::set.""" + analyzer = CodeAnalyzer() + + comp = ast.SetComp( + elt=ast.Name(id='x', ctx=ast.Load()), + generators=[ + ast.comprehension( + target=ast.Name(id='x', ctx=ast.Store()), + iter=ast.Call(func=ast.Name(id='range', ctx=ast.Load()), args=[ast.Constant(value=5)], keywords=[]), + ifs=[], + is_async=0 + ) + ] + ) + + # Debug print + result = analyzer._infer_expression_type(comp) + print(f"Set comprehension type: {result}") + print(f"Generator iter type: {type(comp.generators[0].iter)}") + print(f"Generator iter func type: {type(comp.generators[0].iter.func)}") + print(f"Generator iter func id: {comp.generators[0].iter.func.id}") + + assert result == 'std::set' def test_type_annotation_handling(self): """Test handling of Python type annotations.""" diff --git a/tests/test_conversion.py b/tests/test_conversion.py index e0a58a6..b4bc5a8 100644 --- a/tests/test_conversion.py +++ b/tests/test_conversion.py @@ -1,13 +1,13 @@ import pytest from pathlib import Path -from src.analyzer.code_analyzer import CodeAnalyzer +from src.analyzer.code_analyzer_fixed import CodeAnalyzer from src.rules.rule_manager import RuleManager from src.rules.basic_rules import ( VariableDeclarationRule, FunctionDefinitionRule, ClassDefinitionRule ) -from src.converter.code_generator import CodeGenerator +from src.converter.code_generator_fixed import CodeGenerator def test_fibonacci_conversion(tmp_path): # Setup diff --git a/tests/test_conversion_fixed.py b/tests/test_conversion_fixed.py index 14788da..eebf7ac 100644 --- a/tests/test_conversion_fixed.py +++ b/tests/test_conversion_fixed.py @@ -1,5 +1,7 @@ import pytest from pathlib import Path +import tempfile +import os from src.analyzer.code_analyzer_fixed import CodeAnalyzer from src.rules.rule_manager import RuleManager from src.rules.basic_rules import ( @@ -56,4 +58,40 @@ def test_fibonacci_conversion(tmp_path): # Verify CMake content cmake_content = (output_dir / "CMakeLists.txt").read_text() assert "cmake_minimum_required" in cmake_content - assert "project(pytocpp_generated)" in cmake_content \ No newline at end of file + assert "project(pytocpp_generated)" in cmake_content + + +def test_set_comprehension_translation(tmp_path): + analyzer = CodeAnalyzer() + rule_manager = RuleManager() + + rule_manager.register_rule(VariableDeclarationRule()) + rule_manager.register_rule(FunctionDefinitionRule()) + rule_manager.register_rule(ClassDefinitionRule()) + + generator = CodeGenerator(rule_manager) + + with tempfile.NamedTemporaryFile("w", suffix=".py", delete=False) as temp: + temp.write( + "def make_set(n):\n" + " return {i * 2 for i in range(n)}\n" + ) + temp_path = Path(temp.name) + + try: + analysis_result = analyzer.analyze_file(temp_path) + + rule_manager.set_context({ + 'type_info': analysis_result.type_info, + 'performance_bottlenecks': analysis_result.performance_bottlenecks, + 'memory_usage': analysis_result.memory_usage, + 'hot_paths': analysis_result.hot_paths + }) + + output_dir = tmp_path / "generated_set" + generator.generate_code(analysis_result, output_dir) + + impl_content = (output_dir / "generated.cpp").read_text() + assert "std::set _set" in impl_content + finally: + os.unlink(temp_path) diff --git a/tests/test_math_function_conversion.py b/tests/test_math_function_conversion.py new file mode 100644 index 0000000..913c5db --- /dev/null +++ b/tests/test_math_function_conversion.py @@ -0,0 +1,21 @@ +import ast +from src.converter.code_generator_fixed import CodeGenerator +from src.rules.rule_manager import RuleManager + + +def translate(expr: str) -> str: + node = ast.parse(expr).body[0].value + generator = CodeGenerator(RuleManager()) + return generator._translate_expression(node, {}) + + +def test_sqrt_translation(): + assert translate('math.sqrt(4)') == 'std::sqrt(4)' + + +def test_sin_translation(): + assert translate('math.sin(x)') == 'std::sin(x)' + + +def test_cos_translation(): + assert translate('math.cos(y)') == 'std::cos(y)'