3
3
import logging
4
4
import os
5
5
import git
6
+ from github import Auth
7
+ from github import Github
6
8
from google .cloud import bigquery
7
9
import requests
8
10
@@ -224,6 +226,56 @@ def query_for_reviews(
224
226
return list (new_commits .values ())
225
227
226
228
229
+ def validate_push_commits (
230
+ new_commits : list [LLVMCommitInfo ], github_auth : Auth .Token
231
+ ) -> None :
232
+ """Validate that push commits don't have a pull request.
233
+
234
+ To address lossiness of data from GitHub Archive BigQuery, we check each
235
+ commit to see if it actually has an associated pull request.
236
+
237
+ Args:
238
+ new_commits: List of commits to validate.
239
+ github_auth: The authentication token to access the GitHub API with.
240
+
241
+ Returns:
242
+ None
243
+ """
244
+ github_object = Github (auth = github_auth )
245
+ github_repo = github_object .get_repo ("llvm/llvm-project" )
246
+
247
+ # Validate that each push commit does not have a pull request
248
+ # Keep track of how many commits were inaccurately marked as push commits
249
+ inaccuracy_count = 0
250
+ for commit in new_commits :
251
+ # Skip pull requested commits
252
+ if commit .has_pull_request :
253
+ continue
254
+
255
+ # Check if this commit actually has a pull request
256
+ pull_requests = github_repo .get_commit (commit .commit_sha ).get_pulls ()
257
+ if pull_requests .totalCount == 0 :
258
+ continue
259
+
260
+ commit .has_pull_request = True
261
+ commit .pull_request_number = pull_requests [0 ].number
262
+
263
+ # Check if the pull request was reviewed and/or approved
264
+ reviews = pull_requests [0 ].get_reviews ()
265
+ commit .is_reviewed = reviews .totalCount > 0
266
+ commit .is_approved = any (
267
+ review .state .lower () == "approved" for review in reviews
268
+ )
269
+
270
+ inaccuracy_count += 1
271
+ logging .info ("\t Review data for commit %s amended" , commit .commit_sha )
272
+
273
+ logging .info (
274
+ "Found %d commits that were incorrectly marked as push commits" ,
275
+ inaccuracy_count ,
276
+ )
277
+
278
+
227
279
def upload_daily_metrics (
228
280
grafana_api_key : str ,
229
281
grafana_metrics_userid : str ,
@@ -271,6 +323,7 @@ def upload_daily_metrics(
271
323
272
324
273
325
def main () -> None :
326
+ github_auth = Auth .Token (os .environ ["GITHUB_TOKEN" ])
274
327
grafana_api_key = os .environ ["GRAFANA_API_KEY" ]
275
328
grafana_metrics_userid = os .environ ["GRAFANA_METRICS_USERID" ]
276
329
@@ -295,6 +348,9 @@ def main() -> None:
295
348
logging .info ("Querying for reviews of new commits." )
296
349
new_commit_info = query_for_reviews (new_commits , date_to_scrape )
297
350
351
+ logging .info ("Validating new commits." )
352
+ validate_push_commits (new_commit_info , github_auth )
353
+
298
354
logging .info ("Uploading metrics to Grafana." )
299
355
upload_daily_metrics (grafana_api_key , grafana_metrics_userid , new_commit_info )
300
356
0 commit comments