Skip to content

Commit 3ae62e0

Browse files
[CI] Explain Flaky Failures
This patch adds functionality to the premerge advisor to explain away flaky failures. For this, we just look to see that the failure observed is failing for a sufficient amount of time, in our case 200 commits. Reviewers: dschuff, gburgessiv, cmtice, lnihlen, Keenuts, dwblaikie Reviewed By: dwblaikie Pull Request: #641
1 parent 3276552 commit 3ae62e0

File tree

2 files changed

+163
-3
lines changed

2 files changed

+163
-3
lines changed

premerge/advisor/advisor_lib.py

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class TestExplanationRequest(TypedDict):
3636
}
3737

3838
EXPLAINED_HEAD_MAX_COMMIT_INDEX_DIFFERENCE = 5
39+
EXPLAINED_FLAKY_MIN_COMMIT_RANGE = 200
3940

4041

4142
def _create_table(table_name: str, connection: sqlite3.Connection):
@@ -131,20 +132,76 @@ def _try_explain_failing_at_head(
131132
return None
132133

133134

135+
def _try_explain_flaky_failure(
136+
db_connection: sqlite3.Connection,
137+
test_failure: TestFailure,
138+
platform: str,
139+
) -> FailureExplanation | None:
140+
"""See if a failure is flaky at head.
141+
142+
This function looks at a test failure and tries to see if the failure is
143+
a known flake at head. It does this heuristically, by seeing if there have
144+
been at least two failures across more than 200 commits. This has the
145+
advantage of being a simple heuristic and performant. We do not
146+
explicitly handle the case where a test has been failing continiously
147+
for this amount of time as this is an OOM more range than any non-flaky
148+
tests have stayed in tree.
149+
150+
Args:
151+
db_connection: The database connection.
152+
test_failure: The test failure to try and explain.
153+
platform: The platform the test failed on.
154+
155+
Returns:
156+
Either None, if the test could not be explained as flaky, or a
157+
FailureExplanation object explaining the test failure.
158+
"""
159+
test_name_matches = db_connection.execute(
160+
"SELECT failure_message, commit_index FROM failures WHERE source_type='postcommit' AND platform=?",
161+
(platform,),
162+
).fetchall()
163+
commit_indices = []
164+
for failure_message, commit_index in test_name_matches:
165+
if failure_message == test_failure["message"]:
166+
commit_indices.append(commit_index)
167+
if len(commit_indices) == 0:
168+
return None
169+
commit_range = max(commit_indices) - min(commit_indices)
170+
if commit_range > EXPLAINED_FLAKY_MIN_COMMIT_RANGE:
171+
return {
172+
"name": test_failure["name"],
173+
"explained": True,
174+
"reason": "This test is flaky in main.",
175+
}
176+
return None
177+
178+
134179
def explain_failures(
135180
explanation_request: TestExplanationRequest,
136181
repository_path: str,
137182
db_connection: sqlite3.Connection,
138183
) -> list[FailureExplanation]:
139184
explanations = []
140185
for test_failure in explanation_request["failures"]:
186+
commit_index = git_utils.get_commit_index(
187+
explanation_request["base_commit_sha"], repository_path, db_connection
188+
)
189+
# We want to try and explain flaky failures first. Otherwise we might
190+
# explain a flaky failure as a failure at head if there is a recent
191+
# failure in the last couple of commits.
192+
explained_as_flaky = _try_explain_flaky_failure(
193+
db_connection,
194+
test_failure,
195+
explanation_request["platform"],
196+
)
197+
if explained_as_flaky:
198+
explanations.append(explained_as_flaky)
199+
continue
141200
explained_at_head = _try_explain_failing_at_head(
142201
db_connection,
143202
test_failure,
144203
explanation_request["base_commit_sha"],
145-
git_utils.get_commit_index(
146-
explanation_request["base_commit_sha"], repository_path, db_connection
147-
),
204+
commit_index,
148205
explanation_request["platform"],
149206
)
150207
if explained_at_head:

premerge/advisor/advisor_lib_test.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ def setUp(self):
8686
[
8787
("8d29a3bb6f3d92d65bf5811b53bf42bf63685359", 1),
8888
("6b7064686b706f7064656d6f6e68756e74657273", 2),
89+
("6a6f73687561747265656a6f7368756174726565", 201),
90+
("6269677375726269677375726269677375726269", 202),
91+
("6d746c616e676c65796d746c616e676c65796d74", 203),
8992
],
9093
)
9194
self.repository_path_dir = tempfile.TemporaryDirectory()
@@ -280,3 +283,103 @@ def test_no_explain_different_message(self):
280283
}
281284
],
282285
)
286+
287+
def _setup_flaky_test_info(
288+
self,
289+
source_type="postcommit",
290+
message="failed in way 1",
291+
second_failure_sha="6269677375726269677375726269677375726269",
292+
):
293+
failures_info = [
294+
{
295+
"source_type": source_type,
296+
"base_commit_sha": "8d29a3bb6f3d92d65bf5811b53bf42bf63685359",
297+
"source_id": "10000",
298+
"failures": [
299+
{"name": "a.ll", "message": message},
300+
],
301+
"platform": "linux-x86_64",
302+
},
303+
{
304+
"source_type": source_type,
305+
"base_commit_sha": second_failure_sha,
306+
"source_id": "100001",
307+
"failures": [
308+
{"name": "a.ll", "message": message},
309+
],
310+
"platform": "linux-x86_64",
311+
},
312+
]
313+
for failure_info in failures_info:
314+
advisor_lib.upload_failures(
315+
failure_info, self.db_connection, self.repository_path
316+
)
317+
318+
def _get_flaky_test_explanations(self):
319+
explanation_request = {
320+
"failures": [{"name": "a.ll", "message": "failed in way 1"}],
321+
"base_commit_sha": "6d746c616e676c65796d746c616e676c65796d74",
322+
"platform": "linux-x86_64",
323+
}
324+
return advisor_lib.explain_failures(
325+
explanation_request, self.repository_path, self.db_connection
326+
)
327+
328+
def test_explain_flaky(self):
329+
self._setup_flaky_test_info()
330+
self.assertListEqual(
331+
self._get_flaky_test_explanations(),
332+
[
333+
{
334+
"name": "a.ll",
335+
"explained": True,
336+
"reason": "This test is flaky in main.",
337+
}
338+
],
339+
)
340+
341+
def test_no_explain_flaky_different_message(self):
342+
self._setup_flaky_test_info(message="failed in way 2")
343+
self.assertListEqual(
344+
self._get_flaky_test_explanations(),
345+
[
346+
{
347+
"name": "a.ll",
348+
"explained": False,
349+
"reason": None,
350+
}
351+
],
352+
)
353+
354+
# Test that we do not explain away flaky failures from pull request data.
355+
# PRs might have the same failures multiple times across a large span of
356+
# base commits, which might accidentally trigger the heuristic.
357+
def test_no_explain_flaky_pullrequest_data(self):
358+
self._setup_flaky_test_info(source_type="pull_request")
359+
self.assertListEqual(
360+
self._get_flaky_test_explanations(),
361+
[
362+
{
363+
"name": "a.ll",
364+
"explained": False,
365+
"reason": None,
366+
}
367+
],
368+
)
369+
370+
# Test that if all of the flaky failures are within a small range, we do
371+
# not report this as a flaky failure.
372+
def test_no_explain_flaky_small_range(self):
373+
self._setup_flaky_test_info(
374+
second_failure_sha="6b7064686b706f7064656d6f6e68756e74657273"
375+
)
376+
self.assertListEqual(
377+
self._get_flaky_test_explanations(),
378+
[
379+
{
380+
"name": "a.ll",
381+
"explained": False,
382+
"reason": None,
383+
}
384+
],
385+
)

0 commit comments

Comments
 (0)