Skip to content

Commit fc2de36

Browse files
committed
scripts: add cherry-pick verification tool with fuzzy matching
This script compares a release branch against a source branch (e.g. master) to verify that all cherry-picked commits are unmodified. It first attempts fast matching using normalized patch hashes. If no exact match is found, it falls back to a fuzzy matching mechanism: - Filters source commits by matching author and commit subject - Compares normalized diffs using diff -u - Selects the closest match based on line difference count Useful for verifying cherry-picks or rebased commits during release processes. Supports scan and compare limits for performance.
1 parent b3eb9a3 commit fc2de36

File tree

1 file changed

+229
-0
lines changed

1 file changed

+229
-0
lines changed

scripts/fuzzy-match-release-branch.sh

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
#!/usr/bin/env bash
2+
3+
# Disabled `set -euo pipefail` to prevent premature exit on Linux due to
4+
# process substitution failures. Some commands (e.g. `diff <(...) <(...)`) can
5+
# fail if input is empty or pipes break, which is tolerated logic in this
6+
# script. macOS handles these cases more gracefully, but GNU diff in Linux does
7+
# not - leading to hard script exits mid-match.
8+
#
9+
# set -euo pipefail
10+
11+
SRC_BRANCH=""
12+
RELEASE_BRANCH=""
13+
SRC_SCAN_LIMIT=1000
14+
RELEASE_LIMIT=0
15+
16+
show_help() {
17+
echo "Usage: $0 --source <branch> --release <branch> [--scan-limit N] [--limit N]"
18+
echo ""
19+
echo " --source Branch where cherry-picks originated (e.g. master)"
20+
echo " --release Branch where cherry-picks landed (e.g. release-rc1)"
21+
echo " --scan-limit Max commits to scan in source branch (default: 1000)"
22+
echo " --limit Number of release commits to compare (default: all)"
23+
exit 1
24+
}
25+
26+
normalize_patch() {
27+
sed '/^index [0-9a-f]\{7,\}\.\.[0-9a-f]\{7,\} [0-9]\{6\}$/d'
28+
}
29+
30+
# Parse args
31+
while [[ $# -gt 0 ]]; do
32+
case "$1" in
33+
--source|--release|--scan-limit|--limit)
34+
if [[ -z "${2:-}" || "$2" =~ ^- ]]; then
35+
echo "Error: Missing value for argument $1" >&2
36+
show_help
37+
fi
38+
case "$1" in
39+
--source) SRC_BRANCH="$2" ;;
40+
--release) RELEASE_BRANCH="$2" ;;
41+
--scan-limit) SRC_SCAN_LIMIT="$2" ;;
42+
--limit) RELEASE_LIMIT="$2" ;;
43+
esac
44+
shift 2
45+
;;
46+
-h|--help) show_help ;;
47+
*) echo "Unknown argument: $1"; show_help ;;
48+
esac
49+
done
50+
51+
if [[ -z "$SRC_BRANCH" || -z "$RELEASE_BRANCH" ]]; then
52+
echo "❌ Missing required arguments."; show_help
53+
fi
54+
55+
# Cross-platform hashing
56+
hash_patch() {
57+
if command -v md5sum >/dev/null 2>&1; then
58+
md5sum | awk '{print $1}'
59+
else
60+
md5 | awk '{print $NF}'
61+
fi
62+
}
63+
64+
echo "🔍 Preparing comparison:"
65+
echo " Source branch : $SRC_BRANCH"
66+
echo " Release branch : $RELEASE_BRANCH"
67+
echo " Max source scan: $SRC_SCAN_LIMIT"
68+
echo " Max release compare: $([[ $RELEASE_LIMIT -gt 0 ]] && echo \"$RELEASE_LIMIT\" || echo \"ALL\")"
69+
echo ""
70+
71+
echo "🔄 Fetching latest refs..."
72+
git fetch --all --quiet || true
73+
74+
echo "📥 Collecting release commits..."
75+
RELEASE_COMMITS=$(git rev-list --no-merges "$RELEASE_BRANCH" ^"$SRC_BRANCH")
76+
if [[ "$RELEASE_LIMIT" -gt 0 ]]; then
77+
RELEASE_COMMITS=$(echo "$RELEASE_COMMITS" | head -n "$RELEASE_LIMIT")
78+
fi
79+
RELEASE_COMMITS=$(echo "$RELEASE_COMMITS" | awk '{ lines[NR] = $0 } END { for (i = NR; i > 0; i--) print lines[i] }')
80+
RELEASE_COMMITS_ARRAY=()
81+
while IFS= read -r line; do
82+
[[ -n "$line" ]] && RELEASE_COMMITS_ARRAY+=("$line")
83+
done <<< "$RELEASE_COMMITS"
84+
echo " → Found ${#RELEASE_COMMITS_ARRAY[@]} release commits."
85+
86+
if [[ "${#RELEASE_COMMITS_ARRAY[@]}" -eq 0 ]]; then
87+
echo "❌ No release commits found. Exiting."
88+
exit 1
89+
fi
90+
91+
echo "📥 Collecting source commits..."
92+
SRC_COMMITS=$(git rev-list --no-merges --max-count="$SRC_SCAN_LIMIT" "$SRC_BRANCH")
93+
SRC_COMMITS_ARRAY=()
94+
while IFS= read -r line; do
95+
[[ -n "$line" ]] && SRC_COMMITS_ARRAY+=("$line")
96+
done <<< "$SRC_COMMITS"
97+
echo " → Found ${#SRC_COMMITS_ARRAY[@]} source commits to scan."
98+
echo ""
99+
100+
echo "⚙️ Indexing source commit metadata..."
101+
echo " → Processing ${#SRC_COMMITS_ARRAY[@]} commits from $SRC_BRANCH..."
102+
SRC_COMMIT_META=()
103+
SRC_PATCH_HASHES=()
104+
SRC_PATCHES=()
105+
106+
progress=0
107+
for commit in "${SRC_COMMITS_ARRAY[@]}"; do
108+
progress=$((progress + 1))
109+
echo -ne "\r [$progress/${#SRC_COMMITS_ARRAY[@]}] Indexing $commit"
110+
author=$(git log -1 --pretty=format:"%an <%ae>" "$commit" 2>/dev/null) || continue
111+
subject=$(git log -1 --pretty=format:"%s" "$commit" 2>/dev/null) || continue
112+
authordate=$(git log -1 --pretty=format:"%ai" "$commit" 2>/dev/null) || continue
113+
meta_key="${subject}__${author}__${authordate}"
114+
patch=$(git show --format= --unified=3 "$commit" | normalize_patch | sed 's/^[[:space:]]*//')
115+
patch_hash=$(echo "$patch" | hash_patch)
116+
117+
SRC_COMMIT_META+=("$meta_key")
118+
SRC_PATCH_HASHES+=("$patch_hash")
119+
SRC_PATCHES+=("$patch")
120+
done
121+
122+
echo -e "\n → Completed source indexing."
123+
124+
TOTAL=${#RELEASE_COMMITS_ARRAY[@]}
125+
MATCHED=0
126+
UNMATCHED=0
127+
128+
for i in "${!RELEASE_COMMITS_ARRAY[@]}"; do
129+
rc_commit="${RELEASE_COMMITS_ARRAY[$i]}"
130+
rc_author=$(git log -1 --pretty=format:"%an <%ae>" "$rc_commit" 2>/dev/null) || continue
131+
rc_subject=$(git log -1 --pretty=format:"%s" "$rc_commit" 2>/dev/null) || continue
132+
rc_authordate=$(git log -1 --pretty=format:"%ai" "$rc_commit" 2>/dev/null) || continue
133+
meta_key="${rc_subject}__${rc_author}__${rc_authordate}"
134+
135+
echo -ne "[$((i + 1))/$TOTAL] Checking ${rc_commit:0:7}... "
136+
137+
rc_patch=$(git show --format= --unified=3 "$rc_commit" | normalize_patch | sed 's/^[[:space:]]*//')
138+
rc_patch_hash=$(echo "$rc_patch" | hash_patch)
139+
140+
found_exact_index=-1
141+
for j in "${!SRC_PATCH_HASHES[@]}"; do
142+
if [[ "${SRC_PATCH_HASHES[$j]}" == "$rc_patch_hash" ]]; then
143+
found_exact_index=$j
144+
break
145+
fi
146+
done
147+
148+
if [[ $found_exact_index -ne -1 ]]; then
149+
found_exact="${SRC_COMMITS_ARRAY[$found_exact_index]}"
150+
meta_info="${SRC_COMMIT_META[$found_exact_index]}"
151+
src_subject="${meta_info%%__*}"
152+
rest="${meta_info#*__}"
153+
src_author="${rest%%__*}"
154+
src_authordate="${rest##*__}"
155+
echo "✅ MATCHES ${found_exact:0:7}"
156+
echo " ↪ RELEASE: $rc_commit"
157+
echo " Author : $rc_author"
158+
echo " Date : $rc_authordate"
159+
echo " Subject: \"$rc_subject\""
160+
echo " ↪ SOURCE : $found_exact"
161+
echo " Author : $src_author"
162+
echo " Date : $src_authordate"
163+
echo " Subject: \"$src_subject\""
164+
echo ""
165+
MATCHED=$((MATCHED + 1))
166+
continue
167+
fi
168+
169+
echo "❌ NO MATCH"
170+
UNMATCHED=$((UNMATCHED + 1))
171+
172+
echo "🔍 Unmatched Commit:"
173+
echo " ↪ Commit : $rc_commit"
174+
echo " ↪ Author : $rc_author"
175+
echo " ↪ Subject: \"$rc_subject\""
176+
echo ""
177+
178+
best_score=99999
179+
best_index=""
180+
fuzzy_candidates=0
181+
182+
for j in "${!SRC_COMMIT_META[@]}"; do
183+
if [[ "${SRC_COMMIT_META[$j]}" == "$meta_key" ]]; then
184+
((fuzzy_candidates++))
185+
diff=$(diff -u <(echo "$rc_patch") <(echo "${SRC_PATCHES[$j]}") || true)
186+
score=$(echo "$diff" | grep -vE '^(--- |\+\+\+ )' | grep -c '^[-+]')
187+
if [[ "$score" -lt "$best_score" ]]; then
188+
best_score=$score
189+
best_index=$j
190+
fi
191+
fi
192+
done
193+
194+
if [[ "$fuzzy_candidates" -eq 0 ]]; then
195+
echo "⚠️ No commits with matching author + subject + date in source branch."
196+
else
197+
match_commit="${SRC_COMMITS_ARRAY[$best_index]}"
198+
match_author=$(git log -1 --pretty=format:"%an <%ae>" "$match_commit")
199+
match_subject=$(git log -1 --pretty=format:"%s" "$match_commit")
200+
201+
changed_files=$(git show --pretty="" --name-only "$rc_commit")
202+
203+
echo "🤔 Closest fuzzy match: $match_commit ($best_score changed lines from $fuzzy_candidates candidates)"
204+
echo " ↪ Author : $match_author"
205+
echo " ↪ Subject: \"$match_subject\""
206+
echo " ↪ Files Changed:"
207+
echo "$changed_files" | sed 's/^/ - /'
208+
echo ""
209+
210+
echo "🔧 Check it manually (patch diff):"
211+
echo " git diff $match_commit $rc_commit -- \$(git show --pretty=\"\" --name-only $rc_commit)"
212+
echo ""
213+
214+
echo "🔍 Diff between release and closest match:"
215+
echo "---------------------------------------------"
216+
git diff "$match_commit" "$rc_commit" -- $changed_files | sed 's/^/ /' || true
217+
echo "---------------------------------------------"
218+
echo ""
219+
fi
220+
221+
done
222+
223+
# Summary
224+
echo ""
225+
echo "🔎 Summary:"
226+
echo " ✅ Matched : $MATCHED"
227+
echo " ❌ Unmatched : $UNMATCHED"
228+
echo " 📦 Total : $TOTAL"
229+

0 commit comments

Comments
 (0)