99"""
1010
1111import json
12+ from typing import Any
1213from unittest .mock import AsyncMock , Mock , patch
1314
1415import pytest
@@ -282,6 +283,7 @@ def test_update_coded_eval_run_spec(self, progress_reporter):
282283 eval_run_id = "test-run-id" ,
283284 actual_output = {"result" : "success" },
284285 execution_time = 5.5 ,
286+ success = True ,
285287 is_coded = True ,
286288 )
287289
@@ -291,6 +293,7 @@ def test_update_coded_eval_run_spec(self, progress_reporter):
291293 assert spec .json ["evaluatorRuns" ] == evaluator_runs
292294 assert spec .json ["result" ]["scores" ] == evaluator_scores
293295 assert spec .json ["completionMetrics" ]["duration" ] == 5
296+ assert spec .json ["status" ] == 2 # COMPLETED
294297
295298 def test_update_legacy_eval_run_spec (self , progress_reporter ):
296299 """Test updating eval run spec for legacy evaluators."""
@@ -305,6 +308,7 @@ def test_update_legacy_eval_run_spec(self, progress_reporter):
305308 eval_run_id = "test-run-id" ,
306309 actual_output = {"result" : "success" },
307310 execution_time = 5.5 ,
311+ success = True ,
308312 is_coded = False ,
309313 )
310314
@@ -314,6 +318,47 @@ def test_update_legacy_eval_run_spec(self, progress_reporter):
314318 assert spec .json ["assertionRuns" ] == assertion_runs
315319 assert spec .json ["result" ]["evaluatorScores" ] == evaluator_scores
316320 assert spec .json ["completionMetrics" ]["duration" ] == 5
321+ assert spec .json ["status" ] == "Completed" # String format for legacy
322+
323+ def test_update_coded_eval_run_spec_with_failure (self , progress_reporter ):
324+ """Test updating eval run spec for coded evaluators with failure."""
325+ evaluator_runs : list [dict [str , Any ]] = []
326+ evaluator_scores : list [dict [str , Any ]] = []
327+
328+ spec = progress_reporter ._update_coded_eval_run_spec (
329+ evaluator_runs = evaluator_runs ,
330+ evaluator_scores = evaluator_scores ,
331+ eval_run_id = "test-run-id" ,
332+ actual_output = {},
333+ execution_time = 0.0 ,
334+ success = False ,
335+ is_coded = True ,
336+ )
337+
338+ assert spec .method == "PUT"
339+ assert "coded/" in spec .endpoint
340+ assert spec .json ["evalRunId" ] == "test-run-id"
341+ assert spec .json ["status" ] == 3 # FAILED
342+
343+ def test_update_legacy_eval_run_spec_with_failure (self , progress_reporter ):
344+ """Test updating eval run spec for legacy evaluators with failure."""
345+ assertion_runs : list [dict [str , Any ]] = []
346+ evaluator_scores : list [dict [str , Any ]] = []
347+
348+ spec = progress_reporter ._update_eval_run_spec (
349+ assertion_runs = assertion_runs ,
350+ evaluator_scores = evaluator_scores ,
351+ eval_run_id = "test-run-id" ,
352+ actual_output = {},
353+ execution_time = 0.0 ,
354+ success = False ,
355+ is_coded = False ,
356+ )
357+
358+ assert spec .method == "PUT"
359+ assert "coded/" not in spec .endpoint
360+ assert spec .json ["evalRunId" ] == "test-run-id"
361+ assert spec .json ["status" ] == "Failed" # String format for legacy
317362
318363
319364# Tests for custom eval set run ID handling
@@ -421,3 +466,72 @@ def test_eval_set_run_created_event_initialization_without_custom_run_id(self):
421466
422467 # Assert
423468 assert event .eval_set_run_id is None
469+
470+
471+ # Tests for eval set run status updates
472+ class TestEvalSetRunStatusUpdates :
473+ """Tests for handling eval set run status updates (completed vs failed)."""
474+
475+ def test_update_eval_set_run_spec_with_success_coded (self , progress_reporter ):
476+ """Test updating eval set run spec for coded evaluators with success=True."""
477+ evaluator_scores = {"eval-1" : 0.9 , "eval-2" : 0.85 }
478+
479+ spec = progress_reporter ._update_eval_set_run_spec (
480+ eval_set_run_id = "test-run-id" ,
481+ evaluator_scores = evaluator_scores ,
482+ is_coded = True ,
483+ success = True ,
484+ )
485+
486+ assert spec .method == "PUT"
487+ assert "coded/" in spec .endpoint
488+ assert spec .json ["evalSetRunId" ] == "test-run-id"
489+ assert spec .json ["status" ] == 2 # COMPLETED = 2
490+
491+ def test_update_eval_set_run_spec_with_failure_coded (self , progress_reporter ):
492+ """Test updating eval set run spec for coded evaluators with success=False."""
493+ evaluator_scores = {"eval-1" : 0.9 , "eval-2" : 0.85 }
494+
495+ spec = progress_reporter ._update_eval_set_run_spec (
496+ eval_set_run_id = "test-run-id" ,
497+ evaluator_scores = evaluator_scores ,
498+ is_coded = True ,
499+ success = False ,
500+ )
501+
502+ assert spec .method == "PUT"
503+ assert "coded/" in spec .endpoint
504+ assert spec .json ["evalSetRunId" ] == "test-run-id"
505+ assert spec .json ["status" ] == 3 # FAILED = 3
506+
507+ def test_update_eval_set_run_spec_with_success_legacy (self , progress_reporter ):
508+ """Test updating eval set run spec for legacy evaluators with success=True."""
509+ evaluator_scores = {"eval-1" : 0.9 , "eval-2" : 0.85 }
510+
511+ spec = progress_reporter ._update_eval_set_run_spec (
512+ eval_set_run_id = "test-run-id" ,
513+ evaluator_scores = evaluator_scores ,
514+ is_coded = False ,
515+ success = True ,
516+ )
517+
518+ assert spec .method == "PUT"
519+ assert "coded/" not in spec .endpoint
520+ assert spec .json ["evalSetRunId" ] == "test-run-id"
521+ assert spec .json ["status" ] == "Completed" # String format for legacy
522+
523+ def test_update_eval_set_run_spec_with_failure_legacy (self , progress_reporter ):
524+ """Test updating eval set run spec for legacy evaluators with success=False."""
525+ evaluator_scores = {"eval-1" : 0.9 , "eval-2" : 0.85 }
526+
527+ spec = progress_reporter ._update_eval_set_run_spec (
528+ eval_set_run_id = "test-run-id" ,
529+ evaluator_scores = evaluator_scores ,
530+ is_coded = False ,
531+ success = False ,
532+ )
533+
534+ assert spec .method == "PUT"
535+ assert "coded/" not in spec .endpoint
536+ assert spec .json ["evalSetRunId" ] == "test-run-id"
537+ assert spec .json ["status" ] == "Failed" # String format for legacy
0 commit comments