Skip to content

Commit c87fea4

Browse files
committed
Add script to validate upstream references in PR branch commits
This script scans each commit in a PR branch for upstream Linux kernel commit references and validates those references. If a commit references an upstream commit, the script checks that the referenced commit exists in mainline and reports if it has been marked with a Fixes: tag in the upstream kernel. Usage: python3 check_kernel_commits.py <repo_path> <pr_branch> <base_branch> [--markdown] By default, the script outputs results for terminal display. Use the --markdown flag to format output for GitHub PR comments.
1 parent ab098bb commit c87fea4

File tree

1 file changed

+175
-0
lines changed

1 file changed

+175
-0
lines changed

check_kernel_commits.py

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import subprocess
5+
import re
6+
import sys
7+
import textwrap
8+
9+
def run_git(repo, args):
10+
"""Run a git command in the given repository and return its output as a string."""
11+
result = subprocess.run(['git', '-C', repo] + args, text=True, capture_output=True, check=False)
12+
if result.returncode != 0:
13+
raise RuntimeError(f"Git command failed: {' '.join(args)}\n{result.stderr}")
14+
return result.stdout
15+
16+
def ref_exists(repo, ref):
17+
"""Return True if the given ref exists in the repository, False otherwise."""
18+
result = subprocess.run(
19+
['git', '-C', repo, 'rev-parse', '--verify', '--quiet', ref],
20+
stdout=subprocess.DEVNULL,
21+
stderr=subprocess.DEVNULL
22+
)
23+
return result.returncode == 0
24+
25+
def get_pr_commits(repo, pr_branch, base_branch):
26+
"""Get a list of commit SHAs that are in the PR branch but not in the base branch."""
27+
output = run_git(repo, ['rev-list', f'{base_branch}..{pr_branch}'])
28+
return output.strip().splitlines()
29+
30+
def get_commit_message(repo, sha):
31+
"""Get the commit message for a given commit SHA."""
32+
return run_git(repo, ['log', '-n', '1', '--format=%B', sha])
33+
34+
def get_short_hash_and_subject(repo, sha):
35+
"""Get the abbreviated commit hash and subject for a given commit SHA."""
36+
output = run_git(repo, ['log', '-n', '1', '--format=%h%x00%s', sha]).strip()
37+
short_hash, subject = output.split('\x00', 1)
38+
return short_hash, subject
39+
40+
def hash_exists_in_mainline(repo, upstream_ref, hash_):
41+
"""
42+
Return True if hash_ is reachable from upstream_ref (i.e., is an ancestor of it).
43+
"""
44+
try:
45+
run_git(repo, ['merge-base', '--is-ancestor', hash_, upstream_ref])
46+
return True
47+
except RuntimeError:
48+
return False
49+
50+
def find_fixes_in_mainline(repo, upstream_ref, hash_):
51+
"""
52+
Return unique commits in upstream_ref that have Fixes: <N chars of hash_> in their message, case-insensitive.
53+
Start from 12 chars and work down to 6, but do not include duplicates if already found at a longer length.
54+
"""
55+
seen_commits = set()
56+
results = []
57+
for length in range(12, 5, -1): # 12 down to 6
58+
short_hash = hash_[:length]
59+
output = run_git(repo, [
60+
'log', upstream_ref, '--grep', f'Fixes: {short_hash}', '-i', '--format=%H %h %s (%an)'
61+
]).strip()
62+
if output:
63+
for line in output.splitlines():
64+
full_hash = line.split()[0]
65+
if full_hash not in seen_commits:
66+
seen_commits.add(full_hash)
67+
results.append(' '.join(line.split()[1:]))
68+
return "\n".join(results)
69+
70+
def wrap_paragraph(text, width=80, initial_indent='', subsequent_indent=''):
71+
"""Wrap a paragraph of text to the specified width and indentation."""
72+
wrapper = textwrap.TextWrapper(width=width,
73+
initial_indent=initial_indent,
74+
subsequent_indent=subsequent_indent,
75+
break_long_words=False,
76+
break_on_hyphens=False)
77+
return wrapper.fill(text)
78+
79+
def main():
80+
parser = argparse.ArgumentParser(description="Check upstream references and Fixes: tags in PR branch commits.")
81+
parser.add_argument("repo", help="Path to the git repo")
82+
parser.add_argument("pr_branch", help="Name of the PR branch")
83+
parser.add_argument("base_branch", help="Name of the base branch")
84+
parser.add_argument("--markdown", action='store_true', help="Output in Markdown, suitable for GitHub PR comments")
85+
args = parser.parse_args()
86+
87+
upstream_ref = 'origin/kernel-mainline'
88+
89+
# Validate that all required refs exist before continuing
90+
missing_refs = []
91+
for refname, refval in [('upstream reference', upstream_ref),
92+
('PR branch', args.pr_branch),
93+
('base branch', args.base_branch)]:
94+
if not ref_exists(args.repo, refval):
95+
missing_refs.append((refname, refval))
96+
if missing_refs:
97+
for refname, refval in missing_refs:
98+
print(f"ERROR: The {refname} '{refval}' does not exist in the given repo.")
99+
print("Please fetch or create the required references before running this script.")
100+
sys.exit(1)
101+
102+
pr_commits = get_pr_commits(args.repo, args.pr_branch, args.base_branch)
103+
if not pr_commits:
104+
if args.markdown:
105+
print("> ℹ️ **No commits found in PR branch that are not in base branch.**")
106+
else:
107+
print("No commits found in PR branch that are not in base branch.")
108+
sys.exit(0)
109+
110+
any_findings = False
111+
out_lines = []
112+
113+
for sha in reversed(pr_commits): # oldest first
114+
short_hash, subject = get_short_hash_and_subject(args.repo, sha)
115+
pr_commit_desc = f"{short_hash} ({subject})"
116+
msg = get_commit_message(args.repo, sha)
117+
upstream_hashes = re.findall(r'^commit\s+([0-9a-fA-F]{12,40})', msg, re.MULTILINE)
118+
for uhash in upstream_hashes:
119+
short_uhash = uhash[:12]
120+
# Ensure the referenced commit in the PR actually exists in the upstream ref.
121+
exists = hash_exists_in_mainline(args.repo, upstream_ref, uhash)
122+
if not exists:
123+
any_findings = True
124+
if args.markdown:
125+
out_lines.append(
126+
f"- ❗ PR commit `{pr_commit_desc}` references upstream commit \n"
127+
f" `{short_uhash}` which does **not** exist in the upstream Linux kernel.\n"
128+
)
129+
else:
130+
header = (f"[NOTFOUND] PR commit {pr_commit_desc} references upstream commit "
131+
f"{short_uhash}, which does not exist in kernel-mainline.")
132+
out_lines.append(
133+
wrap_paragraph(header, width=80, initial_indent='',
134+
subsequent_indent=' ') # 11 spaces for '[NOTFOUND] '
135+
)
136+
out_lines.append("") # blank line
137+
continue
138+
fixes = find_fixes_in_mainline(args.repo, upstream_ref, uhash)
139+
if fixes:
140+
any_findings = True
141+
if args.markdown:
142+
fixes_block = " " + fixes.replace("\n", "\n ")
143+
out_lines.append(
144+
f"- ⚠️ PR commit `{pr_commit_desc}` references upstream commit \n"
145+
f" `{short_uhash}` which has been referenced by a `Fixes:` tag in the upstream \n"
146+
f" Linux kernel:\n\n"
147+
f"```text\n{fixes_block}\n```\n"
148+
)
149+
else:
150+
header = (f"[FIXES] PR commit {pr_commit_desc} references upstream commit "
151+
f"{short_uhash}, which has Fixes tags:")
152+
out_lines.append(
153+
wrap_paragraph(header, width=80, initial_indent='',
154+
subsequent_indent=' ') # 8 spaces for '[FIXES] '
155+
)
156+
out_lines.append("") # blank line after 'Fixes tags:'
157+
for line in fixes.splitlines():
158+
out_lines.append(' ' + line)
159+
out_lines.append("") # blank line
160+
161+
if any_findings:
162+
if args.markdown:
163+
print("## :mag: Upstream Linux Kernel Commit Check\n")
164+
print('\n'.join(out_lines))
165+
print("*This is an automated message from the kernel commit checker workflow.*")
166+
else:
167+
print('\n'.join(out_lines))
168+
else:
169+
if args.markdown:
170+
print("> ✅ **All referenced commits exist upstream and have no Fixes: tags.**")
171+
else:
172+
print("All referenced commits exist upstream and have no Fixes: tags.")
173+
174+
if __name__ == "__main__":
175+
main()

0 commit comments

Comments
 (0)