Skip to content

Commit 76f6500

Browse files
committed
[CI] Validate scraped push commits via GitHub API
1 parent 131e3d0 commit 76f6500

File tree

3 files changed

+305
-0
lines changed

3 files changed

+305
-0
lines changed

llvm-ops-metrics/ops-container/process_llvm_commits.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import logging
44
import os
55
import git
6+
from github import Auth
7+
from github import Github
68
from google.cloud import bigquery
79
import requests
810

@@ -224,6 +226,56 @@ def query_for_reviews(
224226
return list(new_commits.values())
225227

226228

229+
def validate_push_commits(
230+
new_commits: list[LLVMCommitInfo], github_auth: Auth.Token
231+
) -> None:
232+
"""Validate that push commits don't have a pull request.
233+
234+
To address lossiness of data from GitHub Archive BigQuery, we check each
235+
commit to see if it actually has an associated pull request.
236+
237+
Args:
238+
new_commits: List of commits to validate.
239+
github_auth: The authentication token to access the GitHub API with.
240+
241+
Returns:
242+
None
243+
"""
244+
github_object = Github(auth=github_auth)
245+
github_repo = github_object.get_repo("llvm/llvm-project")
246+
247+
# Validate that each push commit does not have a pull request
248+
# Keep track of how many commits were inaccurately marked as push commits
249+
inaccuracy_count = 0
250+
for commit in new_commits:
251+
# Skip pull requested commits
252+
if commit.has_pull_request:
253+
continue
254+
255+
# Check if this commit actually has a pull request
256+
pull_requests = github_repo.get_commit(commit.commit_sha).get_pulls()
257+
if pull_requests.totalCount == 0:
258+
continue
259+
260+
commit.has_pull_request = True
261+
commit.pull_request_number = pull_requests[0].number
262+
263+
# Check if the pull request was reviewed and/or approved
264+
reviews = pull_requests[0].get_reviews()
265+
commit.is_reviewed = reviews.totalCount > 0
266+
commit.is_approved = any(
267+
review.state.lower() == "approved" for review in reviews
268+
)
269+
270+
inaccuracy_count += 1
271+
logging.info("\tReview data for commit %s amended", commit.commit_sha)
272+
273+
logging.info(
274+
"Found %d commits that were incorrectly marked as push commits",
275+
inaccuracy_count,
276+
)
277+
278+
227279
def upload_daily_metrics(
228280
grafana_api_key: str,
229281
grafana_metrics_userid: str,
@@ -271,6 +323,7 @@ def upload_daily_metrics(
271323

272324

273325
def main() -> None:
326+
github_auth = Auth.Token(os.environ["GITHUB_TOKEN"])
274327
grafana_api_key = os.environ["GRAFANA_API_KEY"]
275328
grafana_metrics_userid = os.environ["GRAFANA_METRICS_USERID"]
276329

@@ -295,6 +348,9 @@ def main() -> None:
295348
logging.info("Querying for reviews of new commits.")
296349
new_commit_info = query_for_reviews(new_commits, date_to_scrape)
297350

351+
logging.info("Validating new commits.")
352+
validate_push_commits(new_commit_info, github_auth)
353+
298354
logging.info("Uploading metrics to Grafana.")
299355
upload_daily_metrics(grafana_api_key, grafana_metrics_userid, new_commit_info)
300356

0 commit comments

Comments
 (0)