[CI] Explain Flaky Failures

boomanaiden154 · web-flow · commit 3ae62e09da2b · 2025-11-14T17:43:49.000Z
This patch adds functionality to the premerge advisor to explain away flaky failures. For this, we just look to see that the failure observed is failing for a sufficient amount of time, in our case 200 commits. Reviewers: dschuff, gburgessiv, cmtice, lnihlen, Keenuts, dwblaikie Reviewed By: dwblaikie Pull Request: #641
diff --git a/premerge/advisor/advisor_lib.py b/premerge/advisor/advisor_lib.py
@@ -36,6 +36,7 @@ class TestExplanationRequest(TypedDict):
 }
 
 EXPLAINED_HEAD_MAX_COMMIT_INDEX_DIFFERENCE = 5
+EXPLAINED_FLAKY_MIN_COMMIT_RANGE = 200
 
 
 def _create_table(table_name: str, connection: sqlite3.Connection):
@@ -131,20 +132,76 @@ def _try_explain_failing_at_head(
     return None
 
 
+def _try_explain_flaky_failure(
+    db_connection: sqlite3.Connection,
+    test_failure: TestFailure,
+    platform: str,
+) -> FailureExplanation | None:
+    """See if a failure is flaky at head.
+
+    This function looks at a test failure and tries to see if the failure is
+    a known flake at head. It does this heuristically, by seeing if there have
+    been at least two failures across more than 200 commits. This has the
+    advantage of being a simple heuristic and performant. We do not
+    explicitly handle the case where a test has been failing continiously
+    for this amount of time as this is an OOM more range than any non-flaky
+    tests have stayed in tree.
+
+    Args:
+      db_connection: The database connection.
+      test_failure: The test failure to try and explain.
+      platform: The platform the test failed on.
+    
+    Returns:
+      Either None, if the test could not be explained as flaky, or a
+      FailureExplanation object explaining the test failure.
+    """
+    test_name_matches = db_connection.execute(
+        "SELECT failure_message, commit_index FROM failures WHERE source_type='postcommit' AND platform=?",
+        (platform,),
+    ).fetchall()
+    commit_indices = []
+    for failure_message, commit_index in test_name_matches:
+        if failure_message == test_failure["message"]:
+            commit_indices.append(commit_index)
+    if len(commit_indices) == 0:
+        return None
+    commit_range = max(commit_indices) - min(commit_indices)
+    if commit_range > EXPLAINED_FLAKY_MIN_COMMIT_RANGE:
+        return {
+            "name": test_failure["name"],
+            "explained": True,
+            "reason": "This test is flaky in main.",
+        }
+    return None
+
+
 def explain_failures(
     explanation_request: TestExplanationRequest,
     repository_path: str,
     db_connection: sqlite3.Connection,
 ) -> list[FailureExplanation]:
     explanations = []
     for test_failure in explanation_request["failures"]:
+        commit_index = git_utils.get_commit_index(
+            explanation_request["base_commit_sha"], repository_path, db_connection
+        )
+        # We want to try and explain flaky failures first. Otherwise we might
+        # explain a flaky failure as a failure at head if there is a recent
+        # failure in the last couple of commits.
+        explained_as_flaky = _try_explain_flaky_failure(
+            db_connection,
+            test_failure,
+            explanation_request["platform"],
+        )
+        if explained_as_flaky:
+            explanations.append(explained_as_flaky)
+            continue
         explained_at_head = _try_explain_failing_at_head(
             db_connection,
             test_failure,
             explanation_request["base_commit_sha"],
-            git_utils.get_commit_index(
-                explanation_request["base_commit_sha"], repository_path, db_connection
-            ),
+            commit_index,
             explanation_request["platform"],
         )
         if explained_at_head:
diff --git a/premerge/advisor/advisor_lib_test.py b/premerge/advisor/advisor_lib_test.py
@@ -86,6 +86,9 @@ def setUp(self):
             [
                 ("8d29a3bb6f3d92d65bf5811b53bf42bf63685359", 1),
                 ("6b7064686b706f7064656d6f6e68756e74657273", 2),
+                ("6a6f73687561747265656a6f7368756174726565", 201),
+                ("6269677375726269677375726269677375726269", 202),
+                ("6d746c616e676c65796d746c616e676c65796d74", 203),
             ],
         )
         self.repository_path_dir = tempfile.TemporaryDirectory()
@@ -280,3 +283,103 @@ def test_no_explain_different_message(self):
                 }
             ],
         )
+
+    def _setup_flaky_test_info(
+        self,
+        source_type="postcommit",
+        message="failed in way 1",
+        second_failure_sha="6269677375726269677375726269677375726269",
+    ):
+        failures_info = [
+            {
+                "source_type": source_type,
+                "base_commit_sha": "8d29a3bb6f3d92d65bf5811b53bf42bf63685359",
+                "source_id": "10000",
+                "failures": [
+                    {"name": "a.ll", "message": message},
+                ],
+                "platform": "linux-x86_64",
+            },
+            {
+                "source_type": source_type,
+                "base_commit_sha": second_failure_sha,
+                "source_id": "100001",
+                "failures": [
+                    {"name": "a.ll", "message": message},
+                ],
+                "platform": "linux-x86_64",
+            },
+        ]
+        for failure_info in failures_info:
+            advisor_lib.upload_failures(
+                failure_info, self.db_connection, self.repository_path
+            )
+
+    def _get_flaky_test_explanations(self):
+        explanation_request = {
+            "failures": [{"name": "a.ll", "message": "failed in way 1"}],
+            "base_commit_sha": "6d746c616e676c65796d746c616e676c65796d74",
+            "platform": "linux-x86_64",
+        }
+        return advisor_lib.explain_failures(
+            explanation_request, self.repository_path, self.db_connection
+        )
+
+    def test_explain_flaky(self):
+        self._setup_flaky_test_info()
+        self.assertListEqual(
+            self._get_flaky_test_explanations(),
+            [
+                {
+                    "name": "a.ll",
+                    "explained": True,
+                    "reason": "This test is flaky in main.",
+                }
+            ],
+        )
+
+    def test_no_explain_flaky_different_message(self):
+        self._setup_flaky_test_info(message="failed in way 2")
+        self.assertListEqual(
+            self._get_flaky_test_explanations(),
+            [
+                {
+                    "name": "a.ll",
+                    "explained": False,
+                    "reason": None,
+                }
+            ],
+        )
+
+    # Test that we do not explain away flaky failures from pull request data.
+    # PRs might have the same failures multiple times across a large span of
+    # base commits, which might accidentally trigger the heuristic.
+    def test_no_explain_flaky_pullrequest_data(self):
+        self._setup_flaky_test_info(source_type="pull_request")
+        self.assertListEqual(
+            self._get_flaky_test_explanations(),
+            [
+                {
+                    "name": "a.ll",
+                    "explained": False,
+                    "reason": None,
+                }
+            ],
+        )
+
+    # Test that if all of the flaky failures are within a small range, we do
+    # not report this as a flaky failure.
+    def test_no_explain_flaky_small_range(self):
+        self._setup_flaky_test_info(
+            second_failure_sha="6b7064686b706f7064656d6f6e68756e74657273"
+        )
+        self.assertListEqual(
+            self._get_flaky_test_explanations(),
+            [
+                {
+                    "name": "a.ll",
+                    "explained": False,
+                    "reason": None,
+                }
+            ],
+        )