Skip to content

Commit 67debe4

Browse files
committed
parallel exec example with docker
1 parent d8e5d83 commit 67debe4

File tree

1 file changed

+84
-0
lines changed

1 file changed

+84
-0
lines changed
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import asyncio
2+
import time
3+
from pathlib import Path
4+
5+
from codeboxapi import CodeBox
6+
7+
8+
async def train_model(codebox: CodeBox, data_split: int) -> dict:
9+
"""Train a model on a subset of data."""
10+
11+
file = Path("examples/assets/advertising.csv")
12+
assert file.exists(), "Dataset file does not exist"
13+
14+
# Upload dataset
15+
await codebox.aupload(file.name, file.read_bytes())
16+
17+
# Install required packages
18+
await codebox.ainstall("pandas")
19+
await codebox.ainstall("scikit-learn")
20+
21+
# Training code with different data splits
22+
code = f"""
23+
import pandas as pd
24+
from sklearn.model_selection import train_test_split
25+
from sklearn.linear_model import LinearRegression
26+
from sklearn.metrics import mean_squared_error, r2_score
27+
28+
# Load and prepare data
29+
data = pd.read_csv('advertising.csv')
30+
X = data[['TV', 'Radio', 'Newspaper']]
31+
y = data['Sales']
32+
33+
# Split with different random states for different data subsets
34+
X_train, X_test, y_train, y_test = train_test_split(
35+
X, y, test_size=0.3, random_state={data_split}
36+
)
37+
38+
# Train model
39+
model = LinearRegression()
40+
model.fit(X_train, y_train)
41+
42+
# Evaluate
43+
y_pred = model.predict(X_test)
44+
mse = mean_squared_error(y_test, y_pred)
45+
r2 = r2_score(y_test, y_pred)
46+
47+
print(f"Split {data_split}:")
48+
print(f"MSE: {{mse:.4f}}")
49+
print(f"R2: {{r2:.4f}}")
50+
print(f"Coefficients: {{model.coef_.tolist()}}")
51+
"""
52+
result = await codebox.aexec(code)
53+
return {"split": data_split, "output": result.text, "errors": result.errors}
54+
55+
56+
async def main():
57+
# Create multiple Docker instances
58+
num_parallel = 4
59+
codeboxes = [CodeBox(api_key="docker") for _ in range(num_parallel)]
60+
61+
# Create tasks for different data splits
62+
tasks = []
63+
for i, codebox in enumerate(codeboxes):
64+
task = asyncio.create_task(train_model(codebox, i))
65+
tasks.append(task)
66+
67+
# Execute and time the parallel processing
68+
start_time = time.perf_counter()
69+
results = await asyncio.gather(*tasks)
70+
end_time = time.perf_counter()
71+
72+
# Print results
73+
print(f"\nParallel execution completed in {end_time - start_time:.2f} seconds\n")
74+
for result in results:
75+
if not result["errors"]:
76+
print(f"Results for {result['split']}:")
77+
print(result["output"])
78+
print("-" * 50)
79+
else:
80+
print(f"Error in split {result['split']}:", result["errors"])
81+
82+
83+
if __name__ == "__main__":
84+
asyncio.run(main())

0 commit comments

Comments
 (0)