Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions src/sentry/utils/committers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
from sentry.utils.hashlib import hash_values

PATH_SEPARATORS = frozenset(["/", "\\"])
# Limit the number of commits to batch in a single query to avoid query timeouts
# from large IN clauses combined with complex LIKE conditions
COMMIT_BATCH_SIZE = 50


def tokenize_path(path: str) -> Iterator[str]:
Expand Down Expand Up @@ -96,11 +99,19 @@ def _get_commit_file_changes(
# build a single query to get all of the commit file that might match the first n frames
path_query = reduce(operator.or_, (Q(filename__iendswith=path) for path in filenames))

commit_file_change_matches = CommitFileChange.objects.filter(
path_query, commit_id__in=[c.id for c in commits]
)
# Batch commits to avoid query timeouts from large IN clauses
# combined with complex LIKE conditions
all_file_changes: list[CommitFileChange] = []
commit_ids = [c.id for c in commits]

for i in range(0, len(commit_ids), COMMIT_BATCH_SIZE):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: chunked can batch these into groups for you

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agree - chunked is much cleaner

batch_commit_ids = commit_ids[i : i + COMMIT_BATCH_SIZE]
commit_file_change_matches = CommitFileChange.objects.filter(
path_query, commit_id__in=batch_commit_ids
)
all_file_changes.extend(list(commit_file_change_matches))

return list(commit_file_change_matches)
return all_file_changes


def _match_commits_paths(
Expand Down
Loading