Skip to content

Commit 1144d0b

Browse files
committed
Add script to validate upstream references in PR branch commits
This script scans each commit in a PR branch for upstream Linux kernel commit references and validates those references. If a commit references an upstream commit, the script checks that the referenced commit exists in mainline and reports if it has been marked with a Fixes: tag in the upstream kernel. Usage: python3 check_kernel_commits.py <repo_path> <pr_branch> <base_branch> [--markdown] By default, the script outputs results for terminal display. Use the --markdown flag to format output for GitHub PR comments.
1 parent ab098bb commit 1144d0b

File tree

1 file changed

+174
-0
lines changed

1 file changed

+174
-0
lines changed

check_kernel_commits.py

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import subprocess
5+
import re
6+
import sys
7+
import textwrap
8+
9+
def run_git(repo, args):
10+
"""Run a git command in the given repository and return its output as a string."""
11+
result = subprocess.run(['git', '-C', repo] + args, text=True, capture_output=True, check=False)
12+
if result.returncode != 0:
13+
raise RuntimeError(f"Git command failed: {' '.join(args)}\n{result.stderr}")
14+
return result.stdout
15+
16+
def ref_exists(repo, ref):
17+
"""Return True if the given ref exists in the repository, False otherwise."""
18+
try:
19+
run_git(repo, ['rev-parse', '--verify', '--quiet', ref])
20+
return True
21+
except RuntimeError:
22+
return False
23+
24+
def get_pr_commits(repo, pr_branch, base_branch):
25+
"""Get a list of commit SHAs that are in the PR branch but not in the base branch."""
26+
output = run_git(repo, ['rev-list', f'{base_branch}..{pr_branch}'])
27+
return output.strip().splitlines()
28+
29+
def get_commit_message(repo, sha):
30+
"""Get the commit message for a given commit SHA."""
31+
return run_git(repo, ['log', '-n', '1', '--format=%B', sha])
32+
33+
def get_short_hash_and_subject(repo, sha):
34+
"""Get the abbreviated commit hash and subject for a given commit SHA."""
35+
output = run_git(repo, ['log', '-n', '1', '--format=%h%x00%s', sha]).strip()
36+
short_hash, subject = output.split('\x00', 1)
37+
return short_hash, subject
38+
39+
def hash_exists_in_mainline(repo, upstream_ref, hash_):
40+
"""
41+
Return True if hash_ is reachable from upstream_ref (i.e., is an ancestor of it).
42+
"""
43+
try:
44+
run_git(repo, ['merge-base', '--is-ancestor', hash_, upstream_ref])
45+
return True
46+
except RuntimeError:
47+
return False
48+
49+
def find_fixes_in_mainline(repo, upstream_ref, hash_):
50+
"""
51+
Return unique commits in upstream_ref that have Fixes: <N chars of hash_> in their message, case-insensitive.
52+
Start from 12 chars and work down to 6, but do not include duplicates if already found at a longer length.
53+
"""
54+
seen_commits = set()
55+
results = []
56+
for length in range(12, 5, -1): # 12 down to 6
57+
short_hash = hash_[:length]
58+
output = run_git(repo, [
59+
'log', upstream_ref, '--grep', f'Fixes: {short_hash}', '-i', '--format=%H %h %s (%an)'
60+
]).strip()
61+
if output:
62+
for line in output.splitlines():
63+
full_hash = line.split()[0]
64+
if full_hash not in seen_commits:
65+
seen_commits.add(full_hash)
66+
results.append(' '.join(line.split()[1:]))
67+
return "\n".join(results)
68+
69+
def wrap_paragraph(text, width=80, initial_indent='', subsequent_indent=''):
70+
"""Wrap a paragraph of text to the specified width and indentation."""
71+
wrapper = textwrap.TextWrapper(width=width,
72+
initial_indent=initial_indent,
73+
subsequent_indent=subsequent_indent,
74+
break_long_words=False,
75+
break_on_hyphens=False)
76+
return wrapper.fill(text)
77+
78+
def main():
79+
parser = argparse.ArgumentParser(description="Check upstream references and Fixes: tags in PR branch commits.")
80+
parser.add_argument("repo", help="Path to the git repo")
81+
parser.add_argument("pr_branch", help="Name of the PR branch")
82+
parser.add_argument("base_branch", help="Name of the base branch")
83+
parser.add_argument("--markdown", action='store_true', help="Output in Markdown, suitable for GitHub PR comments")
84+
args = parser.parse_args()
85+
86+
upstream_ref = 'origin/kernel-mainline'
87+
88+
# Validate that all required refs exist before continuing
89+
missing_refs = []
90+
for refname, refval in [('upstream reference', upstream_ref),
91+
('PR branch', args.pr_branch),
92+
('base branch', args.base_branch)]:
93+
if not ref_exists(args.repo, refval):
94+
missing_refs.append((refname, refval))
95+
if missing_refs:
96+
for refname, refval in missing_refs:
97+
print(f"ERROR: The {refname} '{refval}' does not exist in the given repo.")
98+
print("Please fetch or create the required references before running this script.")
99+
sys.exit(1)
100+
101+
pr_commits = get_pr_commits(args.repo, args.pr_branch, args.base_branch)
102+
if not pr_commits:
103+
if args.markdown:
104+
print("> ℹ️ **No commits found in PR branch that are not in base branch.**")
105+
else:
106+
print("No commits found in PR branch that are not in base branch.")
107+
sys.exit(0)
108+
109+
any_findings = False
110+
out_lines = []
111+
112+
for sha in reversed(pr_commits): # oldest first
113+
short_hash, subject = get_short_hash_and_subject(args.repo, sha)
114+
pr_commit_desc = f"{short_hash} ({subject})"
115+
msg = get_commit_message(args.repo, sha)
116+
upstream_hashes = re.findall(r'^commit\s+([0-9a-fA-F]{12,40})', msg, re.MULTILINE)
117+
for uhash in upstream_hashes:
118+
short_uhash = uhash[:12]
119+
# Ensure the referenced commit in the PR actually exists in the upstream ref.
120+
exists = hash_exists_in_mainline(args.repo, upstream_ref, uhash)
121+
if not exists:
122+
any_findings = True
123+
if args.markdown:
124+
out_lines.append(
125+
f"- ❗ PR commit `{pr_commit_desc}` references upstream commit \n"
126+
f" `{short_uhash}` which does **not** exist in the upstream Linux kernel.\n"
127+
)
128+
else:
129+
header = (f"[NOTFOUND] PR commit {pr_commit_desc} references upstream commit "
130+
f"{short_uhash}, which does not exist in kernel-mainline.")
131+
out_lines.append(
132+
wrap_paragraph(header, width=80, initial_indent='',
133+
subsequent_indent=' ') # 11 spaces for '[NOTFOUND] '
134+
)
135+
out_lines.append("") # blank line
136+
continue
137+
fixes = find_fixes_in_mainline(args.repo, upstream_ref, uhash)
138+
if fixes:
139+
any_findings = True
140+
if args.markdown:
141+
fixes_block = " " + fixes.replace("\n", "\n ")
142+
out_lines.append(
143+
f"- ⚠️ PR commit `{pr_commit_desc}` references upstream commit \n"
144+
f" `{short_uhash}` which has been referenced by a `Fixes:` tag in the upstream \n"
145+
f" Linux kernel:\n\n"
146+
f"```text\n{fixes_block}\n```\n"
147+
)
148+
else:
149+
header = (f"[FIXES] PR commit {pr_commit_desc} references upstream commit "
150+
f"{short_uhash}, which has Fixes tags:")
151+
out_lines.append(
152+
wrap_paragraph(header, width=80, initial_indent='',
153+
subsequent_indent=' ') # 8 spaces for '[FIXES] '
154+
)
155+
out_lines.append("") # blank line after 'Fixes tags:'
156+
for line in fixes.splitlines():
157+
out_lines.append(' ' + line)
158+
out_lines.append("") # blank line
159+
160+
if any_findings:
161+
if args.markdown:
162+
print("## :mag: Upstream Linux Kernel Commit Check\n")
163+
print('\n'.join(out_lines))
164+
print("*This is an automated message from the kernel commit checker workflow.*")
165+
else:
166+
print('\n'.join(out_lines))
167+
else:
168+
if args.markdown:
169+
print("> ✅ **All referenced commits exist upstream and have no Fixes: tags.**")
170+
else:
171+
print("All referenced commits exist upstream and have no Fixes: tags.")
172+
173+
if __name__ == "__main__":
174+
main()

0 commit comments

Comments
 (0)