9
9
GRAFANA_URL = (
10
10
"https://influx-prod-13-prod-us-east-0.grafana.net/api/v1/push/influx/write"
11
11
)
12
-
13
- # Path to checked out llvm/llvm-project repository
14
- REPOSITORY_PATH = "/data/llvm-project"
15
-
16
- # Path to record of most recently processed commits
17
- DATA_PATH = "/data/recent_commits.csv"
12
+ REPOSITORY_URL = "https://github.com/llvm/llvm-project.git"
18
13
19
14
# Number of days to look back for new commits
20
15
# We allow some buffer time between when a commit is made and when it is queried
@@ -61,99 +56,33 @@ class LLVMCommitInfo:
61
56
is_approved : bool = False
62
57
63
58
64
- def read_past_commits () -> list [list [str ]]:
65
- """Read recently scraped commits from the data path.
66
-
67
- Returns:
68
- List of commits that have been scraped.
69
- """
70
- # If the data path doesn't exist, we haven't scraped any commits yet.
71
- if not os .path .exists (DATA_PATH ):
72
- logging .warning (
73
- " Data path %s does not exist. No past commits found." , DATA_PATH
74
- )
75
- return []
76
-
77
- # Read the past commits from the data path
78
- with open (DATA_PATH , "r" ) as f :
79
- f .readline () # Skip header
80
- rows = f .readlines ()
81
- commit_history = [row .strip ().split ("," ) for row in rows if row .strip ()]
82
- return commit_history
83
-
84
-
85
- def record_new_commits (new_commits : list [LLVMCommitInfo ]) -> None :
86
- """Record newly scraped commits to the data path.
87
-
88
- Args:
89
- new_commits: List of commits to record.
90
-
91
- Returns:
92
- None
93
- """
94
- with open (DATA_PATH , "w" ) as f :
95
-
96
- # Write CSV header
97
- f .write (
98
- "," .join ([
99
- "commit_sha" ,
100
- "commit_datetime" ,
101
- "has_pull_request" ,
102
- "pull_request_number" ,
103
- "is_reviewed" ,
104
- "is_approved" ,
105
- ])
106
- + "\n "
107
- )
108
-
109
- # We want the newest commit as the last entry, so iterate backwards
110
- for i in range (len (new_commits ) - 1 , - 1 , - 1 ):
111
- commit_info = new_commits [i ]
112
- record = "," .join ([
113
- commit_info .commit_sha ,
114
- commit_info .commit_datetime .astimezone (
115
- datetime .timezone .utc
116
- ).isoformat (),
117
- str (commit_info .has_pull_request ),
118
- str (commit_info .pr_number ),
119
- str (commit_info .is_reviewed ),
120
- str (commit_info .is_approved ),
121
- ])
122
- f .write (f"{ record } \n " )
123
-
124
-
125
59
def scrape_new_commits_by_date (
126
- last_known_commit : str , target_datetime : datetime .datetime
60
+ target_datetime : datetime .datetime ,
127
61
) -> list [git .Commit ]:
128
62
"""Scrape new commits from a given dates.
129
63
130
64
Args:
131
- last_known_commit: The last known scraped commit.
132
65
target_datetime: The date to scrape for new commits.
133
66
134
67
Returns:
135
68
List of new commits made on the given date.
136
69
"""
137
- # Pull any new commits into local repository
138
- repo = git .Repo (REPOSITORY_PATH )
139
- repo .remotes .origin .pull ()
70
+ # Clone repository to current working directory
71
+ repo = git .Repo .clone_from (
72
+ url = REPOSITORY_URL ,
73
+ to_path = "./llvm-project" ,
74
+ )
140
75
141
76
# Scrape for new commits
142
77
# iter_commits() yields commits in reverse chronological order
143
78
new_commits = []
144
79
for commit in repo .iter_commits ():
145
- # Skip commits that are too new
80
+ # Skip commits that don't match the target date
146
81
committed_datetime = commit .committed_datetime .astimezone (
147
82
datetime .timezone .utc
148
83
)
149
- if committed_datetime .date () > target_datetime .date ():
84
+ if committed_datetime .date () != target_datetime .date ():
150
85
continue
151
- # Stop scraping if the commit is older than the target date
152
- if committed_datetime .date () < target_datetime .date ():
153
- break
154
- # Stop scraping if we've already recorded this commit
155
- if commit .hexsha == last_known_commit :
156
- break
157
86
158
87
new_commits .append (commit )
159
88
@@ -274,20 +203,15 @@ def main() -> None:
274
203
grafana_api_key = os .environ ["GRAFANA_API_KEY" ]
275
204
grafana_metrics_userid = os .environ ["GRAFANA_METRICS_USERID" ]
276
205
277
- logging .info ("Reading recently processed commits." )
278
- recorded_commits = read_past_commits ()
279
-
280
- last_known_commit = recorded_commits [- 1 ][0 ] if recorded_commits else ""
281
-
282
- # Scrape new commits, if any
206
+ # Scrape new commits
283
207
date_to_scrape = datetime .datetime .now (
284
208
datetime .timezone .utc
285
209
) - datetime .timedelta (days = LOOKBACK_DAYS )
286
210
logging .info (
287
- "Scraping checked out llvm/llvm-project for new commits on %s" ,
211
+ "Cloning and scraping llvm/llvm-project for new commits on %s" ,
288
212
date_to_scrape .strftime ("%Y-%m-%d" ),
289
213
)
290
- new_commits = scrape_new_commits_by_date (last_known_commit , date_to_scrape )
214
+ new_commits = scrape_new_commits_by_date (date_to_scrape )
291
215
if not new_commits :
292
216
logging .info ("No new commits found. Exiting." )
293
217
return
@@ -298,11 +222,7 @@ def main() -> None:
298
222
logging .info ("Uploading metrics to Grafana." )
299
223
upload_daily_metrics (grafana_api_key , grafana_metrics_userid , new_commit_info )
300
224
301
- logging .info ("Recording new commits." )
302
- record_new_commits (new_commit_info )
303
-
304
225
305
226
if __name__ == "__main__" :
306
227
logging .basicConfig (level = logging .INFO )
307
228
main ()
308
-
0 commit comments