Skip to content

Commit a54906c

Browse files
[π˜€π—½π—Ώ] initial version
Created using spr 1.3.7
1 parent f9d32bc commit a54906c

File tree

2 files changed

+202
-0
lines changed

2 files changed

+202
-0
lines changed
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import sqlite3
2+
import os
3+
import subprocess
4+
import logging
5+
6+
REPOSITORY_URL = "https://github.com/llvm/llvm-project"
7+
FIRST_COMMIT_SHA = "f8f7f1b67c8ee5d81847955dc36fab86a6d129ad"
8+
9+
10+
def _clone_repository_if_not_present(
11+
repository_path: str, repository_url=REPOSITORY_URL
12+
):
13+
if not os.path.exists(os.path.join(repository_path, ".git")):
14+
logging.info("Cloning git repository.")
15+
subprocess.run(
16+
["git", "clone", repository_url, os.path.basename(repository_path)],
17+
cwd=os.path.dirname(repository_path),
18+
check=True,
19+
)
20+
logging.info("Finished cloning git repository.")
21+
22+
23+
def _get_and_add_commit_index(
24+
commit_sha: str,
25+
repository_path: str,
26+
db_connection: sqlite3.Connection,
27+
first_commit_sha,
28+
) -> int:
29+
# Ensure the repository is up to date.
30+
subprocess.run(["git", "fetch"], cwd=repository_path, check=True)
31+
# Get the highest indexed commit so we can ensure we only add new
32+
# commits.
33+
latest_commit_info = db_connection.execute(
34+
"SELECT * FROM commits ORDER BY commit_index DESC"
35+
).fetchone()
36+
commits_to_add = []
37+
if latest_commit_info:
38+
latest_sha, latest_index = latest_commit_info
39+
else:
40+
latest_sha = first_commit_sha
41+
latest_index = 1
42+
log_output = subprocess.run(
43+
["git", "log", "--oneline", "--no-abbrev", f"{latest_sha}..{commit_sha}"],
44+
cwd=repository_path,
45+
stdout=subprocess.PIPE,
46+
check=True,
47+
)
48+
log_lines = log_output.stdout.decode("utf-8").split("\n")[:-1]
49+
for line_index, log_line in enumerate(log_lines):
50+
line_commit_sha = log_line.split(" ")[0]
51+
commit_index = latest_index + len(log_lines) - line_index
52+
commits_to_add.append((line_commit_sha, commit_index))
53+
db_connection.executemany("INSERT INTO commits VALUES(?, ?)", commits_to_add)
54+
if not latest_commit_info:
55+
commits_to_add.append((first_commit_sha, 1))
56+
return commits_to_add[0][1]
57+
58+
59+
def get_commit_index(
60+
commit_sha: str,
61+
repository_path: str,
62+
db_connection: sqlite3.Connection,
63+
first_commit_sha=FIRST_COMMIT_SHA,
64+
) -> int:
65+
_clone_repository_if_not_present(repository_path)
66+
# Check to see if we already have the commit in the DB.
67+
commit_matches = db_connection.execute(
68+
"SELECT * FROM commits WHERE commit_sha=?", (commit_sha,)
69+
).fetchall()
70+
if len(commit_matches) > 1:
71+
raise ValueError("Expected only one entry per commit SHA")
72+
elif len(commit_matches) == 1:
73+
return commit_matches[0][1]
74+
# We have not seen this commit before. Count the index and then add it to
75+
# the DB.
76+
return _get_and_add_commit_index(
77+
commit_sha, repository_path, db_connection, first_commit_sha
78+
)
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
import unittest
2+
import tempfile
3+
import sqlite3
4+
import subprocess
5+
import os
6+
7+
import git_utils
8+
9+
10+
class GitUtilsTest(unittest.TestCase):
11+
def setUp(self):
12+
self.db_file = tempfile.NamedTemporaryFile()
13+
self.db_connection = sqlite3.connect(self.db_file.name)
14+
self.db_connection.execute("CREATE TABLE commits(commit_sha, commit_index)")
15+
self.repository_path = tempfile.TemporaryDirectory(delete=False)
16+
17+
def tearDown(self):
18+
self.db_file.close()
19+
# self.repository_path.cleanup()
20+
21+
def setup_repository(self, commit_count: int) -> list[str]:
22+
subprocess.run(["git", "init"], cwd=self.repository_path.name, check=True)
23+
for commit_index in range(commit_count):
24+
with open(
25+
os.path.join(self.repository_path.name, str(commit_index)), "w"
26+
) as commit_file:
27+
commit_file.write("test")
28+
subprocess.run(
29+
["git", "add", "--all"], cwd=self.repository_path.name, check=True
30+
)
31+
subprocess.run(
32+
[
33+
"git",
34+
"commit",
35+
"--author='test <test@example.com>'",
36+
"-m",
37+
"message",
38+
],
39+
cwd=self.repository_path.name,
40+
check=True,
41+
)
42+
log_process = subprocess.run(
43+
["git", "log", "--oneline", "--no-abbrev"],
44+
cwd=self.repository_path.name,
45+
stdout=subprocess.PIPE,
46+
check=True,
47+
)
48+
commit_shas = []
49+
for log_line in log_process.stdout.decode("utf-8").split("\n")[:-1]:
50+
commit_shas.append(log_line.split(" ")[0])
51+
commit_shas.reverse()
52+
return commit_shas
53+
54+
def test_clone_repository(self):
55+
self.setup_repository(5)
56+
utils_repo_folder = tempfile.TemporaryDirectory()
57+
utils_repo_path = os.path.join(utils_repo_folder.name, "repo")
58+
git_utils._clone_repository_if_not_present(
59+
utils_repo_path, self.repository_path.name
60+
)
61+
log_process = subprocess.run(
62+
["git", "log", "--oneline", "--no-abbrev", "--max-count=5"],
63+
cwd=utils_repo_path,
64+
stdout=subprocess.PIPE,
65+
check=True,
66+
)
67+
self.assertEqual(len(log_process.stdout.decode("utf-8").split("\n")) - 1, 5)
68+
69+
def test_get_index_from_db(self):
70+
self.setup_repository(1)
71+
self.db_connection.execute(
72+
"INSERT INTO commits VALUES(?, ?)",
73+
("f3939dc5093826c05f2a78ce1b0af769cd48fdab", 5),
74+
)
75+
self.assertEqual(
76+
git_utils.get_commit_index(
77+
"f3939dc5093826c05f2a78ce1b0af769cd48fdab",
78+
self.repository_path.name,
79+
self.db_connection,
80+
),
81+
5,
82+
)
83+
84+
def test_get_first_commit_from_git(self):
85+
commit_shas = self.setup_repository(2)
86+
self.assertEqual(
87+
git_utils.get_commit_index(
88+
commit_shas[1],
89+
self.repository_path.name,
90+
self.db_connection,
91+
commit_shas[0],
92+
),
93+
2,
94+
)
95+
96+
def test_get_index_from_git(self):
97+
commit_shas = self.setup_repository(3)
98+
self.db_connection.execute(
99+
"INSERT INTO commits VALUES(?, ?)", (commit_shas[1], 3)
100+
)
101+
self.assertEqual(
102+
git_utils.get_commit_index(
103+
commit_shas[2], self.repository_path.name, self.db_connection
104+
),
105+
4,
106+
)
107+
108+
def test_get_index_from_git_multiple_commits(self):
109+
commit_shas = self.setup_repository(4)
110+
self.db_connection.execute(
111+
"INSERT INTO commits VALUES(?, ?)", (commit_shas[1], 3)
112+
)
113+
self.assertEqual(
114+
git_utils.get_commit_index(
115+
commit_shas[3], self.repository_path.name, self.db_connection
116+
),
117+
5,
118+
)
119+
self.assertEqual(
120+
git_utils.get_commit_index(
121+
commit_shas[2], self.repository_path.name, self.db_connection
122+
),
123+
4,
124+
)

0 commit comments

Comments
Β (0)