Skip to content

Commit 274638f

Browse files
committed
Merge remote-tracking branch 'origin/master' into bal-devnet-2-bals-only
2 parents 58ab5a7 + 8bf9d47 commit 274638f

File tree

293 files changed

+21735
-973
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

293 files changed

+21735
-973
lines changed
Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
# SPDX-FileCopyrightText: 2026 Demerzel Solutions Limited
2+
# SPDX-License-Identifier: LGPL-3.0-only
3+
4+
"""Shared utilities for EVM opcode benchmark comparison.
5+
6+
Used by the detect-noisy and compare steps in evm-opcode-benchmark-diff.yml.
7+
"""
8+
9+
import glob
10+
import os
11+
import re
12+
import statistics
13+
14+
ANSI_RE = re.compile(r"\x1B\[[0-9;]*[A-Za-z]")
15+
VALUE_RE = re.compile(r"^\s*([0-9][0-9,]*(?:\.[0-9]+)?)\s*([a-zA-Zµμ]+)\s*$")
16+
UNIT_TO_NS = {
17+
"ns": 1.0,
18+
"us": 1_000.0,
19+
"µs": 1_000.0,
20+
"μs": 1_000.0,
21+
"ms": 1_000_000.0,
22+
"s": 1_000_000_000.0,
23+
}
24+
25+
26+
def read_env_config():
27+
"""Read benchmark comparison thresholds from environment variables."""
28+
return {
29+
"default_threshold": float(os.environ.get("THRESHOLD_PERCENT", "5")),
30+
"noise_multiplier": float(os.environ.get("NOISE_MULTIPLIER", "2.0")),
31+
"error_multiplier": float(os.environ.get("ERROR_MULTIPLIER", "1.0")),
32+
"abs_delta_ns_floor": float(os.environ.get("ABS_DELTA_NS_FLOOR", "2.0")),
33+
"delta_margin_percent": float(os.environ.get("DELTA_MARGIN_PERCENT", "2.0")),
34+
}
35+
36+
37+
def collect_logs(base_pattern="evm-opcodes-base*.log", pr_pattern="evm-opcodes-pr*.log"):
38+
"""Collect and sort benchmark log files, with fallback defaults."""
39+
base_logs = sorted(glob.glob(base_pattern))
40+
pr_logs = sorted(glob.glob(pr_pattern))
41+
if not base_logs:
42+
base_logs = ["evm-opcodes-base.log"]
43+
if not pr_logs:
44+
pr_logs = ["evm-opcodes-pr.log"]
45+
return base_logs, pr_logs
46+
47+
48+
def normalize_text(text):
49+
"""Strip ANSI escape codes and non-breaking spaces."""
50+
text = text.replace("\xa0", " ")
51+
return ANSI_RE.sub("", text)
52+
53+
54+
def parse_ns(value):
55+
"""Parse a BenchmarkDotNet timing value (e.g. '12.34 ns') to nanoseconds."""
56+
m = VALUE_RE.match(value.strip())
57+
if not m:
58+
return None
59+
number = float(m.group(1).replace(",", ""))
60+
unit = m.group(2)
61+
scale = UNIT_TO_NS.get(unit)
62+
if scale is None:
63+
return None
64+
return number * scale
65+
66+
67+
def cv_percent(mean, stddev):
68+
"""Compute coefficient of variation as a percentage."""
69+
if mean is None or stddev is None or mean <= 0:
70+
return None
71+
return (stddev / mean) * 100.0
72+
73+
74+
def fmt_cv(mean, stddev):
75+
"""Format coefficient of variation for display."""
76+
if mean is None or stddev is None or mean == 0:
77+
return "N/A"
78+
cv = (stddev / mean) * 100
79+
return f"{cv:.1f}%"
80+
81+
82+
def uncertainty_floor_percent(base_val, base_error, pr_error, error_multiplier):
83+
"""Compute uncertainty floor from BDN Error columns as a percentage."""
84+
if base_val is None or base_val <= 0:
85+
return None
86+
if base_error is None and pr_error is None:
87+
return None
88+
be = base_error or 0.0
89+
pe = pr_error or 0.0
90+
return ((be + pe) / base_val) * 100.0 * error_multiplier
91+
92+
93+
def find_col(headers, name):
94+
"""Find column index by name, or None if missing."""
95+
return headers.index(name) if name in headers else None
96+
97+
98+
def pick_median(values):
99+
"""Return the median of non-None values, or None if empty."""
100+
values = [v for v in values if v is not None]
101+
if not values:
102+
return None
103+
return statistics.median(values)
104+
105+
106+
def extract_opcode_data(path):
107+
"""Extract opcode stats (median, mean, error, stddev, threshold) from a BDN log file."""
108+
with open(path, "r", encoding="utf-8", errors="replace") as f:
109+
text = normalize_text(f.read())
110+
111+
lines = text.splitlines()
112+
header_idx = -1
113+
for i, line in enumerate(lines):
114+
if line.strip().startswith("|") and "Opcode" in line and "Mean" in line:
115+
header_idx = i
116+
117+
if header_idx < 0:
118+
return {}
119+
120+
headers = [c.strip() for c in lines[header_idx].strip().strip("|").split("|")]
121+
opcode_col = find_col(headers, "Opcode")
122+
median_col = find_col(headers, "Median")
123+
mean_col = find_col(headers, "Mean")
124+
error_col = find_col(headers, "Error")
125+
stddev_col = find_col(headers, "StdDev")
126+
threshold_col = find_col(headers, "Threshold")
127+
128+
if opcode_col is None or mean_col is None:
129+
return {}
130+
131+
data = {}
132+
i = header_idx + 2
133+
while i < len(lines):
134+
line = lines[i].strip()
135+
if not line.startswith("|"):
136+
break
137+
138+
cells = [c.strip() for c in line.strip("|").split("|")]
139+
if len(cells) <= max(opcode_col, mean_col):
140+
i += 1
141+
continue
142+
143+
opcode = cells[opcode_col]
144+
mean = parse_ns(cells[mean_col])
145+
if opcode and mean is not None:
146+
median = parse_ns(cells[median_col]) if median_col is not None and len(cells) > median_col else None
147+
error = parse_ns(cells[error_col]) if error_col is not None and len(cells) > error_col else None
148+
stddev = parse_ns(cells[stddev_col]) if stddev_col is not None and len(cells) > stddev_col else None
149+
threshold = None
150+
if threshold_col is not None and len(cells) > threshold_col:
151+
try:
152+
threshold = float(cells[threshold_col])
153+
except (ValueError, IndexError):
154+
pass
155+
data[opcode] = {"median": median, "mean": mean, "error": error, "stddev": stddev, "threshold": threshold}
156+
i += 1
157+
158+
return data
159+
160+
161+
def aggregate(log_paths):
162+
"""Aggregate opcode data across multiple benchmark log files using median."""
163+
runs = [extract_opcode_data(path) for path in log_paths]
164+
all_opcodes = sorted(set().union(*(r.keys() for r in runs)))
165+
result = {}
166+
for opcode in all_opcodes:
167+
rows = [r[opcode] for r in runs if opcode in r]
168+
result[opcode] = {
169+
"median": pick_median([x.get("median") for x in rows]),
170+
"mean": pick_median([x.get("mean") for x in rows]),
171+
"error": pick_median([x.get("error") for x in rows]),
172+
"stddev": pick_median([x.get("stddev") for x in rows]),
173+
"threshold": pick_median([x.get("threshold") for x in rows]),
174+
}
175+
return result
176+
177+
178+
def compare_opcodes(base_data, pr_data, config):
179+
"""Compare base vs PR opcode data and return per-opcode comparison results.
180+
181+
Returns a list of (opcode, info) tuples for every opcode. Each info dict contains:
182+
base_val, pr_val, delta_pct, delta_abs_ns,
183+
base_mean, pr_mean, base_error, pr_error, base_stddev, pr_stddev,
184+
threshold, noise_floor, uncertainty_floor, effective_threshold,
185+
is_flagged, is_noisy
186+
"""
187+
results = []
188+
for opcode in sorted(set(base_data.keys()) | set(pr_data.keys())):
189+
b = base_data.get(opcode)
190+
p = pr_data.get(opcode)
191+
base_val = (b.get("median") or b.get("mean")) if b else None
192+
pr_val = (p.get("median") or p.get("mean")) if p else None
193+
base_mean = b["mean"] if b else None
194+
pr_mean = p["mean"] if p else None
195+
base_error = b.get("error") if b else None
196+
pr_error = p.get("error") if p else None
197+
base_stddev = b["stddev"] if b else None
198+
pr_stddev = p["stddev"] if p else None
199+
threshold = (b or p or {}).get("threshold") or config["default_threshold"]
200+
201+
base_cv_pct = cv_percent(base_mean, base_stddev)
202+
pr_cv_pct = cv_percent(pr_mean, pr_stddev)
203+
cv_values = [v for v in (base_cv_pct, pr_cv_pct) if v is not None]
204+
noise_floor = (max(cv_values) * config["noise_multiplier"]) if cv_values else 0.0
205+
uf = uncertainty_floor_percent(base_val, base_error, pr_error, config["error_multiplier"]) or 0.0
206+
effective_threshold = max(threshold, noise_floor, uf)
207+
208+
delta_pct = None
209+
delta_abs_ns = None
210+
is_flagged = False
211+
is_noisy = noise_floor > threshold or uf > threshold
212+
213+
if base_val is None or pr_val is None:
214+
# New or removed opcode
215+
is_flagged = True
216+
elif base_val == 0:
217+
is_flagged = pr_val != 0
218+
else:
219+
delta_pct = ((pr_val - base_val) / base_val) * 100.0
220+
delta_abs_ns = abs(pr_val - base_val)
221+
is_flagged = (
222+
abs(delta_pct) >= (effective_threshold + config["delta_margin_percent"])
223+
and delta_abs_ns >= config["abs_delta_ns_floor"]
224+
)
225+
226+
results.append((opcode, {
227+
"base_val": base_val,
228+
"pr_val": pr_val,
229+
"delta_pct": delta_pct,
230+
"delta_abs_ns": delta_abs_ns,
231+
"base_mean": base_mean,
232+
"pr_mean": pr_mean,
233+
"base_error": base_error,
234+
"pr_error": pr_error,
235+
"base_stddev": base_stddev,
236+
"pr_stddev": pr_stddev,
237+
"threshold": threshold,
238+
"noise_floor": noise_floor,
239+
"uncertainty_floor": uf,
240+
"effective_threshold": effective_threshold,
241+
"is_flagged": is_flagged,
242+
"is_noisy": is_noisy,
243+
}))
244+
return results

.github/workflows/build-stateless-exec.yml

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ on:
88
push:
99
branches: [master]
1010
workflow_dispatch:
11+
inputs:
12+
build_on_remote:
13+
description: Build on remote RISC-V host
14+
type: boolean
15+
required: false
16+
default: false
1117

1218
jobs:
1319
build-zisk-sim:
@@ -38,7 +44,7 @@ jobs:
3844
working-directory: tools/StatelessExecution
3945
run: make build-tool-local
4046

41-
- name: Build Zisk Simulator binary
47+
- name: Build Zisk simulator binary
4248
working-directory: tools/StatelessExecution
4349
run: make build-zisk-sim
4450

@@ -55,14 +61,14 @@ jobs:
5561
fi
5662
echo "::notice::Program executed successfully with exit code 0"
5763
58-
- name: Execute Zisk Sim binary on on RISC-V host
64+
- name: Execute Zisk simulator binary on RISC-V host
65+
if: github.event.inputs.build_on_remote == 'true'
5966
working-directory: tools/StatelessExecution
6067
env:
6168
STATELESS_EXECUTOR_RISCV_HOST: ${{ secrets.STATELESS_EXECUTOR_RISCV_HOST }}
6269
STATELESS_EXECUTOR_RISCV_USERNAME: ${{ secrets.STATELESS_EXECUTOR_RISCV_USERNAME }}
6370
STATELESS_EXECUTOR_RISCV_SSH_PRIVATE_KEY: ${{ secrets.STATELESS_EXECUTOR_RISCV_SSH_PRIVATE_KEY }}
64-
run: |
65-
./stateless_exec_test.sh
71+
run: ./stateless_exec_test.sh
6672

6773
- name: Upload Zisk simulator binary
6874
uses: actions/upload-artifact@v4

0 commit comments

Comments
 (0)