9
9
GRAFANA_URL = (
10
10
"https://influx-prod-13-prod-us-east-0.grafana.net/api/v1/push/influx/write"
11
11
)
12
+ GITHUB_GRAPHQL_API_URL = "https://api.github.com/graphql"
12
13
REPOSITORY_URL = "https://github.com/llvm/llvm-project.git"
13
14
15
+ # How many commits to query the GitHub GraphQL API for at a time.
16
+ # Querying too many commits at once often leads to the call failing.
17
+ GITHUB_API_BATCH_SIZE = 75
18
+
14
19
# Number of days to look back for new commits
15
20
# We allow some buffer time between when a commit is made and when it is queried
16
21
# for reviews. This is allow time for any events to propogate in the GitHub
44
49
AND JSON_VALUE(pr_event.payload, '$.pull_request.merge_commit_sha') IS NOT NULL
45
50
"""
46
51
52
+ # Template GraphQL subquery to check if a commit has an associated pull request
53
+ # and whether that pull request has been reviewed and approved.
54
+ COMMIT_GRAPHQL_SUBQUERY_TEMPLATE = """
55
+ commit_{commit_sha}:
56
+ object(oid:"{commit_sha}") {{
57
+ ... on Commit {{
58
+ associatedPullRequests(first: 1) {{
59
+ totalCount
60
+ pullRequest: nodes {{
61
+ number
62
+ reviewDecision
63
+ }}
64
+ }}
65
+ }}
66
+ }}
67
+ """
68
+
47
69
48
70
@dataclasses .dataclass
49
71
class LLVMCommitInfo :
@@ -153,6 +175,88 @@ def query_for_reviews(
153
175
return list (new_commits .values ())
154
176
155
177
178
+ def validate_push_commits (
179
+ new_commits : list [LLVMCommitInfo ], github_token : str
180
+ ) -> None :
181
+ """Validate that push commits don't have a pull request.
182
+
183
+ To address lossiness of data from GitHub Archive BigQuery, we check each
184
+ commit to see if it actually has an associated pull request.
185
+
186
+ Args:
187
+ new_commits: List of commits to validate.
188
+ github_token: The access token to use with the GitHub GraphQL API.
189
+
190
+ Returns:
191
+ None
192
+ """
193
+
194
+ # Get all push commits from new commits and form their subqueries
195
+ commit_subqueries = []
196
+ potential_push_commits = {}
197
+ for commit in new_commits :
198
+ if commit .has_pull_request :
199
+ continue
200
+ potential_push_commits [commit .commit_sha ] = commit
201
+ commit_subqueries .append (
202
+ COMMIT_GRAPHQL_SUBQUERY_TEMPLATE .format (commit_sha = commit .commit_sha )
203
+ )
204
+ logging .info ("Found %d potential push commits" , len (potential_push_commits ))
205
+
206
+ # Query GitHub GraphQL API for pull requests associated with push commits
207
+ # We query in batches as large queries often fail
208
+ api_commit_data = {}
209
+ query_template = """
210
+ query {
211
+ repository(owner:"llvm", name:"llvm-project"){
212
+ %s
213
+ }
214
+ }
215
+ """
216
+ num_batches = len (commit_subqueries ) // GITHUB_API_BATCH_SIZE + 1
217
+ logging .info ("Querying GitHub GraphQL API in %d batches" , num_batches )
218
+ for i in range (num_batches ):
219
+ subquery_batch = commit_subqueries [
220
+ i * GITHUB_API_BATCH_SIZE : (i + 1 ) * GITHUB_API_BATCH_SIZE
221
+ ]
222
+ query = query_template % "" .join (subquery_batch )
223
+
224
+ logging .info (
225
+ "Querying batch %d of %d (%d commits)" ,
226
+ i + 1 ,
227
+ num_batches ,
228
+ len (subquery_batch ),
229
+ )
230
+ response = requests .post (
231
+ url = GITHUB_GRAPHQL_API_URL ,
232
+ headers = {
233
+ "Authorization" : f"bearer { github_token } " ,
234
+ },
235
+ json = {"query" : query },
236
+ )
237
+ if response .status_code < 200 or response .status_code >= 300 :
238
+ logging .error ("Failed to query GitHub GraphQL API: %s" , response .text )
239
+ api_commit_data .update (response .json ()["data" ]["repository" ])
240
+
241
+ amend_count = 0
242
+ for commit_sha , data in api_commit_data .items ():
243
+ # Verify that push commit has no pull requests
244
+ commit_sha = commit_sha .removeprefix ("commit_" )
245
+ if data ["associatedPullRequests" ]["totalCount" ] == 0 :
246
+ continue
247
+
248
+ # Amend fields with new data from API
249
+ pull_request = data ["associatedPullRequests" ]["pullRequest" ][0 ]
250
+ commit_info = potential_push_commits [commit_sha ]
251
+ commit_info .has_pull_request = True
252
+ commit_info .pr_number = pull_request ["number" ]
253
+ commit_info .is_reviewed = pull_request ["reviewDecision" ] is not None
254
+ commit_info .is_approved = pull_request ["reviewDecision" ] == "APPROVED"
255
+ amend_count += 1
256
+
257
+ logging .info ("Amended %d commits" , amend_count )
258
+
259
+
156
260
def upload_daily_metrics (
157
261
grafana_api_key : str ,
158
262
grafana_metrics_userid : str ,
@@ -200,6 +304,7 @@ def upload_daily_metrics(
200
304
201
305
202
306
def main () -> None :
307
+ github_token = os .environ ["GITHUB_TOKEN" ]
203
308
grafana_api_key = os .environ ["GRAFANA_API_KEY" ]
204
309
grafana_metrics_userid = os .environ ["GRAFANA_METRICS_USERID" ]
205
310
@@ -219,6 +324,9 @@ def main() -> None:
219
324
logging .info ("Querying for reviews of new commits." )
220
325
new_commit_info = query_for_reviews (new_commits , date_to_scrape )
221
326
327
+ logging .info ("Validating push commits." )
328
+ validate_push_commits (new_commit_info , github_token )
329
+
222
330
logging .info ("Uploading metrics to Grafana." )
223
331
upload_daily_metrics (grafana_api_key , grafana_metrics_userid , new_commit_info )
224
332
0 commit comments