-
Notifications
You must be signed in to change notification settings - Fork 2.4k
fix(dspy): Example.toDict() fails to serialize dspy.History objects #9047
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
597a7b4
e79dc12
c67ba33
ebb502c
2a82f8d
1d56c29
9ed984c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -193,6 +193,9 @@ def toDict(self): # noqa: N802 | |
| def convert_to_serializable(value): | ||
| if hasattr(value, "toDict"): | ||
| return value.toDict() | ||
| elif hasattr(value, "model_dump"): | ||
|
||
| # Handle Pydantic models (e.g., dspy.History) | ||
| return value.model_dump() | ||
| elif isinstance(value, list): | ||
| return [convert_to_serializable(item) for item in value] | ||
| elif isinstance(value, dict): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,141 @@ | ||
| """Test Evaluate with dspy.History objects.""" | ||
| import json | ||
| import tempfile | ||
|
|
||
| import dspy | ||
| from dspy.evaluate import Evaluate | ||
| from dspy.evaluate.metrics import answer_exact_match | ||
| from dspy.predict import Predict | ||
| from dspy.utils.dummies import DummyLM | ||
|
|
||
|
|
||
| def test_evaluate_save_as_json_with_history(): | ||
|
||
| """Test that save_as_json works with Examples containing dspy.History objects.""" | ||
| # Setup | ||
| dspy.settings.configure( | ||
| lm=DummyLM( | ||
| { | ||
| "What is 1+1?": {"answer": "2"}, | ||
| "What is 2+2?": {"answer": "4"}, | ||
| } | ||
| ) | ||
| ) | ||
|
|
||
| # Create history objects | ||
| history1 = dspy.History( | ||
| messages=[ | ||
| {"question": "Previous Q1", "answer": "Previous A1"}, | ||
| ] | ||
| ) | ||
| history2 = dspy.History( | ||
| messages=[ | ||
| {"question": "Previous Q2", "answer": "Previous A2"}, | ||
| {"question": "Previous Q3", "answer": "Previous A3"}, | ||
| ] | ||
| ) | ||
|
|
||
| # Create examples with history | ||
| devset = [ | ||
| dspy.Example(question="What is 1+1?", answer="2", history=history1).with_inputs("question"), | ||
| dspy.Example(question="What is 2+2?", answer="4", history=history2).with_inputs("question"), | ||
| ] | ||
|
|
||
| program = Predict("question -> answer") | ||
|
|
||
| # Create evaluator with save_as_json | ||
| with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: | ||
| temp_json = f.name | ||
|
|
||
| try: | ||
| evaluator = Evaluate( | ||
| devset=devset, | ||
| metric=answer_exact_match, | ||
| display_progress=False, | ||
| save_as_json=temp_json, | ||
| ) | ||
|
|
||
| result = evaluator(program) | ||
| assert result.score == 100.0 | ||
|
|
||
| # Verify JSON file was created and is valid | ||
| with open(temp_json) as f: | ||
| data = json.load(f) | ||
|
|
||
| assert len(data) == 2 | ||
|
|
||
| # Verify history was properly serialized in first record | ||
| assert "history" in data[0] | ||
| assert isinstance(data[0]["history"], dict) | ||
| assert "messages" in data[0]["history"] | ||
| assert len(data[0]["history"]["messages"]) == 1 | ||
| assert data[0]["history"]["messages"][0] == {"question": "Previous Q1", "answer": "Previous A1"} | ||
|
|
||
| # Verify history was properly serialized in second record | ||
| assert "history" in data[1] | ||
| assert isinstance(data[1]["history"], dict) | ||
| assert "messages" in data[1]["history"] | ||
| assert len(data[1]["history"]["messages"]) == 2 | ||
| assert data[1]["history"]["messages"][0] == {"question": "Previous Q2", "answer": "Previous A2"} | ||
| assert data[1]["history"]["messages"][1] == {"question": "Previous Q3", "answer": "Previous A3"} | ||
|
|
||
| finally: | ||
| import os | ||
| if os.path.exists(temp_json): | ||
| os.unlink(temp_json) | ||
|
|
||
|
|
||
| def test_evaluate_save_as_csv_with_history(): | ||
| """Test that save_as_csv works with Examples containing dspy.History objects.""" | ||
| # Setup | ||
| dspy.settings.configure( | ||
| lm=DummyLM( | ||
| { | ||
| "What is 1+1?": {"answer": "2"}, | ||
| } | ||
| ) | ||
| ) | ||
|
|
||
| # Create history object | ||
| history = dspy.History( | ||
| messages=[ | ||
| {"question": "Previous Q", "answer": "Previous A"}, | ||
| ] | ||
| ) | ||
|
|
||
| # Create example with history | ||
| devset = [ | ||
| dspy.Example(question="What is 1+1?", answer="2", history=history).with_inputs("question"), | ||
| ] | ||
|
|
||
| program = Predict("question -> answer") | ||
|
|
||
| # Create evaluator with save_as_csv | ||
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | ||
| temp_csv = f.name | ||
|
|
||
| try: | ||
| evaluator = Evaluate( | ||
| devset=devset, | ||
| metric=answer_exact_match, | ||
| display_progress=False, | ||
| save_as_csv=temp_csv, | ||
| ) | ||
|
|
||
| result = evaluator(program) | ||
| assert result.score == 100.0 | ||
|
|
||
| # Verify CSV file was created | ||
| import csv | ||
| with open(temp_csv) as f: | ||
| reader = csv.DictReader(f) | ||
| rows = list(reader) | ||
|
|
||
| assert len(rows) == 1 | ||
| assert "history" in rows[0] | ||
| # CSV will have string representation of the dict | ||
| assert "messages" in rows[0]["history"] | ||
|
|
||
| finally: | ||
| import os | ||
| if os.path.exists(temp_csv): | ||
| os.unlink(temp_csv) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@copilot do we still need this change after adding changes to Example?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, we need both changes. While
Example.toDict()now serializes History correctly,dict(example)andexample.items()still return raw History objects. The original code usedmerge_dicts(example, prediction)which calls.items()internally, so it would still fail JSON serialization without this change.Without the evaluate.py change:
merge_dicts(example, prediction)→ fails with "Object of type History is not JSON serializable"With both changes:
merge_dicts(example.toDict(), prediction)→ succeeds because History is already serialized to dictThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@copilot I see, can we change merge_dict to call toDict if needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done in ebb502c. Moved the toDict() call into merge_dicts(), which now automatically serializes objects with toDict() method. This keeps the serialization logic centralized and makes _prepare_results_output() cleaner - it now passes Example objects directly to merge_dicts().