Skip to content

Commit 2d38305

Browse files
committed
Added dataclasses, load, and save functions + README.
1 parent 185e216 commit 2d38305

File tree

10 files changed

+787
-0
lines changed

10 files changed

+787
-0
lines changed

src/schemas/README.md

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# ACE Pipeline Schemas
2+
3+
This directory contains standardized schemas for all ACE pipeline stages, ensuring consistent data formats across different implementations.
4+
5+
## Structure
6+
7+
- **[`PIPELINE_SCHEMAS.md`](PIPELINE_SCHEMAS.md)** - Complete documentation of input/output formats for each stage
8+
- **Python Dataclasses** - Type-safe data structures for each stage:
9+
- [`experiment_schemas.py`](experiment_schemas.py) - Experiment and Domain (Stage 0)
10+
- [`metadata_schemas.py`](metadata_schemas.py) - Common metadata (PipelineMetadata)
11+
- [`area_schemas.py`](area_schemas.py) - Area generation (Stage 1)
12+
- [`capability_schemas.py`](capability_schemas.py) - Capability generation (Stage 2)
13+
- [`task_schemas.py`](task_schemas.py) - Task generation (Stage 3)
14+
- [`solution_schemas.py`](solution_schemas.py) - Solution generation (Stage 4)
15+
- [`validation_schemas.py`](validation_schemas.py) - Validation (Stage 5)
16+
- **I/O Utilities** - Save and load functions:
17+
- [`io_utils.py`](io_utils.py) - Functions to save/load all stage outputs (save/load functions for all 7 stage outputs)
18+
19+
## Usage
20+
21+
### Using Python Dataclasses
22+
23+
```python
24+
from src.schemas import (
25+
Experiment,
26+
Domain,
27+
PipelineMetadata,
28+
Area,
29+
Capability,
30+
Task,
31+
TaskSolution,
32+
ValidationResult,
33+
)
34+
35+
# Create area
36+
area = Area(
37+
name="Cash Flow & Budget Management",
38+
area_id="area_000",
39+
description="Design and monitor budgets...",
40+
domain="personal finance",
41+
domain_id="domain_000",
42+
# generation_metadata is optional
43+
)
44+
45+
# Convert to dict for JSON serialization
46+
data = area.to_dict()
47+
48+
# Load from dict
49+
area = Area.from_dict(data)
50+
```
51+
52+
### Using Save/Load Functions
53+
54+
```python
55+
from pathlib import Path
56+
from src.schemas import (
57+
save_areas_output,
58+
load_areas_output,
59+
PipelineMetadata,
60+
Area,
61+
)
62+
63+
# Save areas
64+
areas = [Area(...), Area(...)]
65+
metadata = PipelineMetadata(
66+
experiment_id="r0_10x10",
67+
output_base_dir="agentic_outputs",
68+
timestamp="2025-11-06T12:00:00Z",
69+
output_stage_tag="_20251009_122040"
70+
)
71+
save_areas_output(areas, metadata, Path("output/areas.json"))
72+
73+
# Load areas
74+
areas, metadata = load_areas_output(Path("output/areas.json"))
75+
```
76+
77+
## Pipeline Stages
78+
79+
0. **Experiment Setup**`Experiment`, `Domain`
80+
1. **Area Generation**`Area`
81+
2. **Capability Generation**`Capability`
82+
3. **Task Generation**`Task`
83+
4. **Solution Generation**`TaskSolution`
84+
5. **Validation**`ValidationResult`
85+
86+
See [`PIPELINE_SCHEMAS.md`](PIPELINE_SCHEMAS.md) for detailed specifications.

src/schemas/__init__.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
"""Standardized schemas for ACE pipeline stages.
2+
3+
This module provides standardized data structures for all pipeline stages,
4+
ensuring consistent input/output formats regardless of internal implementation.
5+
"""
6+
7+
from src.schemas.area_schemas import Area
8+
from src.schemas.capability_schemas import Capability
9+
from src.schemas.experiment_schemas import Domain, Experiment
10+
from src.schemas.io_utils import (
11+
load_areas_output,
12+
load_capabilities_output,
13+
load_domain_output,
14+
load_experiment_output,
15+
load_solution_output,
16+
load_tasks_output,
17+
load_validation_output,
18+
save_areas_output,
19+
save_capabilities_output,
20+
save_domain_output,
21+
save_experiment_output,
22+
save_solution_output,
23+
save_tasks_output,
24+
save_validation_output,
25+
)
26+
from src.schemas.metadata_schemas import PipelineMetadata
27+
from src.schemas.solution_schemas import TaskSolution
28+
from src.schemas.task_schemas import Task
29+
from src.schemas.validation_schemas import ValidationResult
30+
31+
32+
__all__ = [
33+
# Metadata
34+
"PipelineMetadata",
35+
# Experiment schemas (Stage 0)
36+
"Experiment",
37+
"Domain",
38+
# Area schemas
39+
"Area",
40+
# Capability schemas
41+
"Capability",
42+
# Task schemas
43+
"Task",
44+
# Solution schemas
45+
"TaskSolution",
46+
# Validation schemas
47+
"ValidationResult",
48+
# I/O functions - Save
49+
"save_experiment_output",
50+
"save_domain_output",
51+
"save_areas_output",
52+
"save_capabilities_output",
53+
"save_tasks_output",
54+
"save_solution_output",
55+
"save_validation_output",
56+
# I/O functions - Load
57+
"load_experiment_output",
58+
"load_domain_output",
59+
"load_areas_output",
60+
"load_capabilities_output",
61+
"load_tasks_output",
62+
"load_solution_output",
63+
"load_validation_output",
64+
]

src/schemas/area_schemas.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
"""Schemas for area generation stage."""
2+
3+
from dataclasses import dataclass, field
4+
from typing import Dict, Optional
5+
6+
7+
@dataclass
8+
class Area:
9+
"""Represents a domain area."""
10+
11+
name: str
12+
area_id: str
13+
description: Optional[str] = None
14+
domain: str = ""
15+
domain_id: str = ""
16+
generation_metadata: Optional[Dict] = field(default_factory=dict)
17+
18+
def to_dict(self):
19+
"""Convert to dictionary."""
20+
result = {
21+
"name": self.name,
22+
"area_id": self.area_id,
23+
"domain": self.domain,
24+
"domain_id": self.domain_id,
25+
}
26+
if self.description is not None:
27+
result["description"] = self.description
28+
if self.generation_metadata:
29+
result["generation_metadata"] = self.generation_metadata
30+
return result
31+
32+
@classmethod
33+
def from_dict(cls, data: dict):
34+
"""Create from dictionary."""
35+
return cls(
36+
name=data["name"],
37+
area_id=data["area_id"],
38+
description=data.get("description"),
39+
domain=data.get("domain", ""),
40+
domain_id=data.get("domain_id", ""),
41+
generation_metadata=data.get("generation_metadata", {}),
42+
)

src/schemas/capability_schemas.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
"""Schemas for capability generation stage."""
2+
3+
from dataclasses import dataclass, field
4+
from typing import Dict, Optional
5+
6+
7+
@dataclass
8+
class Capability:
9+
"""Represents a capability within an area."""
10+
11+
name: str
12+
capability_id: str
13+
description: Optional[str] = None
14+
area: str = ""
15+
area_id: str = ""
16+
domain: str = ""
17+
domain_id: str = ""
18+
generation_metadata: Optional[Dict] = field(default_factory=dict)
19+
20+
def to_dict(self):
21+
"""Convert to dictionary."""
22+
result = {
23+
"name": self.name,
24+
"capability_id": self.capability_id,
25+
"area": self.area,
26+
"area_id": self.area_id,
27+
"domain": self.domain,
28+
"domain_id": self.domain_id,
29+
}
30+
if self.description is not None:
31+
result["description"] = self.description
32+
if self.generation_metadata:
33+
result["generation_metadata"] = self.generation_metadata
34+
return result
35+
36+
@classmethod
37+
def from_dict(cls, data: dict):
38+
"""Create from dictionary."""
39+
return cls(
40+
name=data["name"],
41+
capability_id=data["capability_id"],
42+
description=data.get("description"),
43+
area=data.get("area", ""),
44+
area_id=data.get("area_id", ""),
45+
domain=data.get("domain", ""),
46+
domain_id=data.get("domain_id", ""),
47+
generation_metadata=data.get("generation_metadata", {}),
48+
)

src/schemas/experiment_schemas.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""Schemas for experiment setup stage (Stage 0)."""
2+
3+
from dataclasses import dataclass
4+
from typing import Any, Dict, Optional
5+
6+
7+
@dataclass
8+
class Experiment:
9+
"""Represents experiment metadata and configuration."""
10+
11+
experiment_id: str
12+
domain: str
13+
domain_id: str
14+
pipeline_type: Optional[str] = None
15+
configuration: Dict[str, Any] = None
16+
17+
def __post_init__(self):
18+
"""Initialize configuration if not provided."""
19+
if self.configuration is None:
20+
self.configuration = {}
21+
22+
def to_dict(self):
23+
"""Convert to dictionary."""
24+
result = {
25+
"experiment_id": self.experiment_id,
26+
"domain": self.domain,
27+
"domain_id": self.domain_id,
28+
"configuration": self.configuration,
29+
}
30+
if self.pipeline_type is not None:
31+
result["pipeline_type"] = self.pipeline_type
32+
return result
33+
34+
@classmethod
35+
def from_dict(cls, data: dict):
36+
"""Create from dictionary."""
37+
return cls(
38+
experiment_id=data["experiment_id"],
39+
domain=data["domain"],
40+
domain_id=data["domain_id"],
41+
pipeline_type=data.get("pipeline_type"),
42+
configuration=data.get("configuration", {}),
43+
)
44+
45+
46+
@dataclass
47+
class Domain:
48+
"""Represents a domain."""
49+
50+
name: str
51+
domain_id: str
52+
description: Optional[str] = None
53+
54+
def to_dict(self):
55+
"""Convert to dictionary."""
56+
result = {
57+
"name": self.name,
58+
"domain_id": self.domain_id,
59+
}
60+
if self.description is not None:
61+
result["description"] = self.description
62+
return result
63+
64+
@classmethod
65+
def from_dict(cls, data: dict):
66+
"""Create from dictionary."""
67+
return cls(
68+
name=data["name"],
69+
domain_id=data["domain_id"],
70+
description=data.get("description"),
71+
)

0 commit comments

Comments
 (0)