From dabfdef39ad1844edeac4d455f5a3f271c734579 Mon Sep 17 00:00:00 2001 From: Brett Mastbergen Date: Wed, 8 Oct 2025 17:17:35 -0400 Subject: [PATCH] [CKC] Add CVE verification with --check-cves option Adds ability to verify that CVE references in PR commit messages correctly match the upstream commits they reference. Uses the kernel vulnerabilities database to cross-check CVE assignments against upstream commit hashes. The --check-cves flag enables validation that detects three error conditions: mismatched CVE assignments between PR and upstream commits, CVE references to upstream commits with no CVE assignment, and failures accessing the vulnerabilities database. Output format matches existing checker patterns with support for both plain text and markdown modes. --- check_kernel_commits.py | 184 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 179 insertions(+), 5 deletions(-) diff --git a/check_kernel_commits.py b/check_kernel_commits.py index 9e92bb7..57e11a6 100644 --- a/check_kernel_commits.py +++ b/check_kernel_commits.py @@ -5,6 +5,8 @@ import re import sys import textwrap +import os +from typing import Optional def run_git(repo, args): """Run a git command in the given repository and return its output as a string.""" @@ -50,6 +52,7 @@ def find_fixes_in_mainline(repo, pr_branch, upstream_ref, hash_): """ Return unique commits in upstream_ref that have Fixes: in their message, case-insensitive. Start from 12 chars and work down to 6, but do not include duplicates if already found at a longer length. + Returns a list of tuples: (full_hash, display_string) """ results = [] # Get all commits with 'Fixes:' in the message @@ -57,7 +60,7 @@ def find_fixes_in_mainline(repo, pr_branch, upstream_ref, hash_): 'log', upstream_ref, '--grep', 'Fixes:', '-i', '--format=%H %h %s (%an)%x0a%B%x00' ]).strip() if not output: - return "" + return [] # Each commit is separated by a NUL character and a newline commits = output.split('\x00\x0a') # Prepare hash prefixes from 12 down to 6 @@ -78,11 +81,11 @@ def find_fixes_in_mainline(repo, pr_branch, upstream_ref, hash_): for prefix in hash_prefixes: if m.group(1).lower().startswith(prefix.lower()): if not commit_exists_in_branch(repo, pr_branch, full_hash): - results.append(' '.join(header.split()[1:])) + results.append((full_hash, ' '.join(header.split()[1:]))) break else: continue - return "\n".join(results) + return results def commit_exists_in_branch(repo, pr_branch, upstream_hash_): """ @@ -104,6 +107,35 @@ def wrap_paragraph(text, width=80, initial_indent='', subsequent_indent=''): break_on_hyphens=False) return wrapper.fill(text) +def extract_cve_from_message(msg): + """Extract CVE reference from commit message. Returns CVE ID or None. + Only matches 'cve CVE-2025-12345', ignores 'cve-bf' and 'cve-pre' variants.""" + match = re.search(r'(? tuple[bool, Optional[str]]: + """ + Run the cve_search script from the vulns repo. + Returns (success, output_message). + """ + cve_search_path = os.path.join(vulns_repo, 'scripts', 'cve_search') + if not os.path.exists(cve_search_path): + raise RuntimeError(f"cve_search script not found at {cve_search_path}") + + env = os.environ.copy() + env['CVEKERNELTREE'] = kernel_repo + + result = subprocess.run([cve_search_path, query], + text=True, + capture_output=True, + check=False, + env=env) + + # cve_search outputs results to stdout + return result.returncode == 0, result.stdout.strip() + def main(): parser = argparse.ArgumentParser(description="Check upstream references and Fixes: tags in PR branch commits.") parser.add_argument("--repo", help="Path to the git repo", required=True) @@ -111,10 +143,39 @@ def main(): parser.add_argument("--base_branch", help="Name of the base branch", required=True) parser.add_argument("--markdown", action='store_true', help="Output in Markdown, suitable for GitHub PR comments") parser.add_argument("--upstream-ref", default="origin/kernel-mainline", help="Reference to upstream mainline branch (default: origin/kernel-mainline)") + parser.add_argument("--check-cves", action='store_true', help="Check that CVE references in commit messages match upstream commit hashes") + parser.add_argument("--vulns-dir", default="../vulns", help="Path to the kernel vulnerabilities repo (default: ../vulns)") args = parser.parse_args() upstream_ref = args.upstream_ref + # Set up vulns repo path if CVE checking is enabled + vulns_repo = None + if args.check_cves: + vulns_repo = args.vulns_dir + vulns_repo_url = "https://git.kernel.org/pub/scm/linux/security/vulns.git" + + if os.path.exists(vulns_repo): + # Repository exists, update it with git pull + try: + run_git(vulns_repo, ['pull']) + except RuntimeError as e: + print(f"WARNING: Failed to update vulns repo: {e}") + print("Continuing with existing repository...") + else: + # Repository doesn't exist, clone it + try: + result = subprocess.run(['git', 'clone', vulns_repo_url, vulns_repo], + text=True, + capture_output=True, + check=False) + if result.returncode != 0: + print(f"ERROR: Failed to clone vulns repo: {result.stderr}") + sys.exit(1) + except Exception as e: + print(f"ERROR: Failed to clone vulns repo: {e}") + sys.exit(1) + # Validate that all required refs exist before continuing missing_refs = [] for refname, refval in [('upstream reference', upstream_ref), @@ -168,8 +229,34 @@ def main(): fixes = find_fixes_in_mainline(args.repo, args.pr_branch, upstream_ref, uhash) if fixes: any_findings = True + + # Check CVEs for bugfix commits if enabled + fix_cves = {} + if args.check_cves: + for fix_hash, fix_display in fixes: + try: + success, cve_output = run_cve_search(vulns_repo, args.repo, fix_hash) + if success: + # Parse the CVE from the result + match = re.search(r'(CVE-\d{4}-\d+)\s+is assigned to git id', cve_output) + if match: + bugfix_cve = match.group(1) + fix_cves[fix_hash] = bugfix_cve + except (RuntimeError, subprocess.SubprocessError) as e: + # Log a warning instead of silently ignoring errors when checking bugfix CVEs + print(f"Warning: Failed to check CVE for bugfix commit {fix_hash}: {e}", file=sys.stderr) + + # Build the fixes display text with CVE info + fixes_lines = [] + for fix_hash, display_str in fixes: + if fix_hash in fix_cves: + fixes_lines.append(f"{display_str} ({fix_cves[fix_hash]})") + else: + fixes_lines.append(display_str) + fixes_text = "\n".join(fixes_lines) + if args.markdown: - fixes_block = " " + fixes.replace("\n", "\n ") + fixes_block = " " + fixes_text.replace("\n", "\n ") out_lines.append( f"- ⚠️ PR commit `{pr_commit_desc}` references upstream commit \n" f" `{short_uhash}` which has been referenced by a `Fixes:` tag in the upstream \n" @@ -185,10 +272,97 @@ def main(): subsequent_indent=' ' * len(prefix)) # spaces for '[FIXES] ' ) out_lines.append("") # blank line after 'Fixes tags:' - for line in fixes.splitlines(): + for line in fixes_text.splitlines(): out_lines.append(' ' + line) out_lines.append("") # blank line + # Check CVE if enabled + if args.check_cves: + cve_id = extract_cve_from_message(msg) + + # Check if the upstream commit has a CVE associated with it + try: + success, cve_output = run_cve_search(vulns_repo, args.repo, uhash) + if success: + # Parse the output to get the CVE from the result + # Expected format: "CVE-2024-35962 is assigned to git id 65acf6e0501ac8880a4f73980d01b5d27648b956" + match = re.search(r'(CVE-\d{4}-\d+)\s+is assigned to git id', cve_output) + if match: + found_cve = match.group(1) + + if cve_id: + # PR commit has a CVE reference - check if it matches + if found_cve != cve_id: + any_findings = True + if args.markdown: + out_lines.append( + f"- ❌ PR commit `{pr_commit_desc}` references `{cve_id}` but \n" + f" upstream commit `{short_uhash}` is associated with `{found_cve}`\n" + ) + else: + prefix = "[CVE-MISMATCH] " + header = (f"{prefix}PR commit {pr_commit_desc} references {cve_id} but " + f"upstream commit {short_uhash} is associated with {found_cve}") + out_lines.append( + wrap_paragraph(header, width=80, initial_indent='', + subsequent_indent=' ' * len(prefix)) + ) + out_lines.append("") # blank line + else: + # PR commit doesn't reference a CVE, but upstream has one + any_findings = True + if args.markdown: + out_lines.append( + f"- ⚠️ PR commit `{pr_commit_desc}` does not reference a CVE but \n" + f" upstream commit `{short_uhash}` is associated with `{found_cve}`\n" + ) + else: + prefix = "[CVE-MISSING] " + header = (f"{prefix}PR commit {pr_commit_desc} does not reference a CVE but " + f"upstream commit {short_uhash} is associated with {found_cve}") + out_lines.append( + wrap_paragraph(header, width=80, initial_indent='', + subsequent_indent=' ' * len(prefix)) + ) + out_lines.append("") # blank line + else: + # The upstream commit has no CVE assigned + if cve_id: + # PR commit claims a CVE but upstream has none + any_findings = True + if args.markdown: + out_lines.append( + f"- ❌ PR commit `{pr_commit_desc}` references `{cve_id}` but \n" + f" upstream commit `{short_uhash}` has no CVE assigned\n" + ) + else: + prefix = "[CVE-NOTFOUND] " + header = (f"{prefix}PR commit {pr_commit_desc} references {cve_id} but " + f"upstream commit {short_uhash} has no CVE assigned") + out_lines.append( + wrap_paragraph(header, width=80, initial_indent='', + subsequent_indent=' ' * len(prefix)) + ) + out_lines.append("") # blank line + except (subprocess.SubprocessError, OSError) as e: + # Error running cve_search + if cve_id: + any_findings = True + if args.markdown: + out_lines.append( + f"- ⚠️ PR commit `{pr_commit_desc}` references `{cve_id}` but \n" + f" failed to verify: {e}\n" + ) + else: + prefix = "[CVE-ERROR] " + header = (f"{prefix}PR commit {pr_commit_desc} references {cve_id} but " + f"failed to verify: {e}") + out_lines.append( + wrap_paragraph(header, width=80, initial_indent='', + subsequent_indent=' ' * len(prefix)) + ) + out_lines.append("") # blank line + if any_findings: if args.markdown: print("## :mag: Upstream Linux Kernel Commit Check\n")