Skip to content

Claude NL/T Full Suite (Unity live) #7

Claude NL/T Full Suite (Unity live)

Claude NL/T Full Suite (Unity live) #7

name: Claude NL/T Full Suite (Unity live)
on: [workflow_dispatch]
permissions:
contents: read
checks: write
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
UNITY_IMAGE: unityci/editor:ubuntu-2021.3.45f1-linux-il2cpp-3
jobs:
nl-suite:
runs-on: ubuntu-latest
timeout-minutes: 60
env:
JUNIT_OUT: reports/junit-nl-suite.xml
MD_OUT: reports/junit-nl-suite.md
steps:
# ---------- Secrets check ----------
- name: Detect secrets (outputs)
id: detect
env:
UNITY_LICENSE: ${{ secrets.UNITY_LICENSE }}
UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }}
UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }}
UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: |
set -e
if [ -n "$ANTHROPIC_API_KEY" ]; then echo "anthropic_ok=true" >> "$GITHUB_OUTPUT"; else echo "anthropic_ok=false" >> "$GITHUB_OUTPUT"; fi
if [ -n "$UNITY_LICENSE" ] || { [ -n "$UNITY_EMAIL" ] && [ -n "$UNITY_PASSWORD" ]; }; then
echo "unity_ok=true" >> "$GITHUB_OUTPUT"
else
echo "unity_ok=false" >> "$GITHUB_OUTPUT"
fi
- uses: actions/checkout@v4
with:
fetch-depth: 0
# ---------- Python env for MCP server (uv) ----------
- uses: astral-sh/setup-uv@v4
with:
python-version: '3.11'
- name: Install MCP server
run: |
set -eux
uv venv
echo "VIRTUAL_ENV=$GITHUB_WORKSPACE/.venv" >> "$GITHUB_ENV"
echo "$GITHUB_WORKSPACE/.venv/bin" >> "$GITHUB_PATH"
if [ -f UnityMcpBridge/UnityMcpServer~/src/pyproject.toml ]; then
uv pip install -e UnityMcpBridge/UnityMcpServer~/src
elif [ -f UnityMcpBridge/UnityMcpServer~/src/requirements.txt ]; then
uv pip install -r UnityMcpBridge/UnityMcpServer~/src/requirements.txt
elif [ -f UnityMcpBridge/UnityMcpServer~/pyproject.toml ]; then
uv pip install -e UnityMcpBridge/UnityMcpServer~/
elif [ -f UnityMcpBridge/UnityMcpServer~/requirements.txt ]; then
uv pip install -r UnityMcpBridge/UnityMcpServer~/requirements.txt
else
echo "No MCP Python deps found (skipping)"
fi
# --- Licensing: allow both ULF and EBL when available ---
- name: Decide license sources
id: lic
shell: bash
env:
UNITY_LICENSE: ${{ secrets.UNITY_LICENSE }}
UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }}
UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }}
UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }}
run: |
set -eu
use_ulf=false; use_ebl=false
[[ -n "${UNITY_LICENSE:-}" ]] && use_ulf=true
[[ -n "${UNITY_EMAIL:-}" && -n "${UNITY_PASSWORD:-}" ]] && use_ebl=true
echo "use_ulf=$use_ulf" >> "$GITHUB_OUTPUT"
echo "use_ebl=$use_ebl" >> "$GITHUB_OUTPUT"
echo "has_serial=$([[ -n "${UNITY_SERIAL:-}" ]] && echo true || echo false)" >> "$GITHUB_OUTPUT"
- name: Stage Unity .ulf license (from secret)
if: steps.lic.outputs.use_ulf == 'true'
id: ulf
env:
UNITY_LICENSE: ${{ secrets.UNITY_LICENSE }}
shell: bash
run: |
set -eu
mkdir -p "$RUNNER_TEMP/unity-license-ulf" "$RUNNER_TEMP/unity-local/Unity"
f="$RUNNER_TEMP/unity-license-ulf/Unity_lic.ulf"
if printf "%s" "$UNITY_LICENSE" | base64 -d - >/dev/null 2>&1; then
printf "%s" "$UNITY_LICENSE" | base64 -d - > "$f"
else
printf "%s" "$UNITY_LICENSE" > "$f"
fi
chmod 600 "$f" || true
# If someone pasted an entitlement XML into UNITY_LICENSE by mistake, re-home it:
if head -c 100 "$f" | grep -qi '<\?xml'; then
mkdir -p "$RUNNER_TEMP/unity-config/Unity/licenses"
mv "$f" "$RUNNER_TEMP/unity-config/Unity/licenses/UnityEntitlementLicense.xml"
echo "ok=false" >> "$GITHUB_OUTPUT"
elif grep -qi '<Signature>' "$f"; then
# provide it in the standard local-share path too
cp -f "$f" "$RUNNER_TEMP/unity-local/Unity/Unity_lic.ulf"
echo "ok=true" >> "$GITHUB_OUTPUT"
else
echo "ok=false" >> "$GITHUB_OUTPUT"
fi
# --- Activate via EBL inside the same Unity image (writes host-side entitlement) ---
- name: Activate Unity (EBL via container - host-mount)
if: steps.lic.outputs.use_ebl == 'true'
shell: bash
env:
UNITY_IMAGE: ${{ env.UNITY_IMAGE }}
UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }}
UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }}
UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }}
run: |
set -euxo pipefail
# host dirs to receive the full Unity config and local-share
mkdir -p "$RUNNER_TEMP/unity-config" "$RUNNER_TEMP/unity-local"
# Try Pro first if serial is present, otherwise named-user EBL.
docker run --rm --network host \
-e HOME=/root \
-e UNITY_EMAIL -e UNITY_PASSWORD -e UNITY_SERIAL \
-v "$RUNNER_TEMP/unity-config:/root/.config/unity3d" \
-v "$RUNNER_TEMP/unity-local:/root/.local/share/unity3d" \
"$UNITY_IMAGE" bash -lc '
set -euxo pipefail
if [[ -n "${UNITY_SERIAL:-}" ]]; then
/opt/unity/Editor/Unity -batchmode -nographics -logFile - \
-username "$UNITY_EMAIL" -password "$UNITY_PASSWORD" -serial "$UNITY_SERIAL" -quit || true
else
/opt/unity/Editor/Unity -batchmode -nographics -logFile - \
-username "$UNITY_EMAIL" -password "$UNITY_PASSWORD" -quit || true
fi
ls -la /root/.config/unity3d/Unity/licenses || true
'
# Verify entitlement written to host mount; allow ULF-only runs to proceed
if ! find "$RUNNER_TEMP/unity-config" -type f -iname "*.xml" | grep -q .; then
if [[ "${{ steps.ulf.outputs.ok }}" == "true" ]]; then
echo "EBL entitlement not found; proceeding with ULF-only (ok=true)."
else
echo "No entitlement produced and no valid ULF; cannot continue." >&2
exit 1
fi
fi
# EBL entitlement is already written directly to $RUNNER_TEMP/unity-config by the activation step
# ---------- Warm up project (import Library once) ----------
- name: Warm up project (import Library once)
if: steps.lic.outputs.use_ulf == 'true' || steps.lic.outputs.use_ebl == 'true'
shell: bash
env:
UNITY_IMAGE: ${{ env.UNITY_IMAGE }}
ULF_OK: ${{ steps.ulf.outputs.ok }}
run: |
set -euxo pipefail
manual_args=()
if [[ "${ULF_OK:-false}" == "true" ]]; then
manual_args=(-manualLicenseFile "/root/.local/share/unity3d/Unity/Unity_lic.ulf")
fi
docker run --rm --network host \
-e HOME=/root \
-v "${{ github.workspace }}:/workspace" -w /workspace \
-v "$RUNNER_TEMP/unity-config:/root/.config/unity3d" \
-v "$RUNNER_TEMP/unity-local:/root/.local/share/unity3d" \
"$UNITY_IMAGE" /opt/unity/Editor/Unity -batchmode -nographics -logFile - \
-projectPath /workspace/TestProjects/UnityMCPTests \
"${manual_args[@]}" \
-quit
# ---------- Clean old MCP status ----------
- name: Clean old MCP status
run: |
set -eux
mkdir -p "$HOME/.unity-mcp"
rm -f "$HOME/.unity-mcp"/unity-mcp-status-*.json || true
# ---------- Start headless Unity (persistent bridge) ----------
- name: Start Unity (persistent bridge)
if: steps.lic.outputs.use_ulf == 'true' || steps.lic.outputs.use_ebl == 'true'
shell: bash
env:
UNITY_IMAGE: ${{ env.UNITY_IMAGE }}
ULF_OK: ${{ steps.ulf.outputs.ok }}
run: |
set -euxo pipefail
manual_args=()
if [[ "${ULF_OK:-false}" == "true" ]]; then
manual_args=(-manualLicenseFile "/root/.local/share/unity3d/Unity/Unity_lic.ulf")
fi
mkdir -p "$RUNNER_TEMP/unity-status"
docker rm -f unity-mcp >/dev/null 2>&1 || true
docker run -d --name unity-mcp --network host \
-e HOME=/root \
-e UNITY_MCP_ALLOW_BATCH=1 \
-e UNITY_MCP_STATUS_DIR=/root/.unity-mcp \
-e UNITY_MCP_BIND_HOST=127.0.0.1 \
-v "${{ github.workspace }}:/workspace" -w /workspace \
-v "$RUNNER_TEMP/unity-status:/root/.unity-mcp" \
-v "$RUNNER_TEMP/unity-config:/root/.config/unity3d:ro" \
-v "$RUNNER_TEMP/unity-local:/root/.local/share/unity3d:ro" \
"$UNITY_IMAGE" /opt/unity/Editor/Unity -batchmode -nographics -logFile - \
-stackTraceLogType Full \
-projectPath /workspace/TestProjects/UnityMCPTests \
"${manual_args[@]}" \
-executeMethod MCPForUnity.Editor.MCPForUnityBridge.StartAutoConnect
# ---------- Wait for Unity bridge ----------
- name: Wait for Unity bridge (robust)
shell: bash
run: |
set -euo pipefail
deadline=$((SECONDS+900)) # 15 min max
fatal_after=$((SECONDS+120)) # give licensing 2 min to settle
# Fail fast only if container actually died
st="$(docker inspect -f '{{.State.Status}} {{.State.ExitCode}}' unity-mcp 2>/dev/null || true)"
case "$st" in exited*|dead*) docker logs unity-mcp --tail 200 | sed -E 's/((email|serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig'; exit 1;; esac
# Patterns
ok_pat='(Bridge|MCP(For)?Unity|AutoConnect).*(listening|ready|started|port|bound)'
# Only truly fatal signals; allow transient "Licensing::..." chatter
license_fatal='No valid Unity|License is not active|cannot load ULF|Signature element not found|Token not found|0 entitlement|Entitlement.*(failed|denied)|License (activation|return|renewal).*(failed|expired|denied)'
while [ $SECONDS -lt $deadline ]; do
logs="$(docker logs unity-mcp 2>&1 || true)"
# 1) Primary: status JSON exposes TCP port
port="$(jq -r '.unity_port // empty' "$RUNNER_TEMP"/unity-status/unity-mcp-status-*.json 2>/dev/null | head -n1 || true)"
if [[ -n "${port:-}" ]] && timeout 1 bash -lc "exec 3<>/dev/tcp/127.0.0.1/$port"; then
echo "Bridge ready on port $port"
exit 0
fi
# 2) Secondary: log markers
if echo "$logs" | grep -qiE "$ok_pat"; then
echo "Bridge ready (log markers)"
exit 0
fi
# Only treat license failures as fatal *after* warm-up
if [ $SECONDS -ge $fatal_after ] && echo "$logs" | grep -qiE "$license_fatal"; then
echo "::error::Fatal licensing signal detected after warm-up"
echo "$logs" | tail -n 200 | sed -E 's/((email|serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig'
exit 1
fi
# If the container dies mid-wait, bail
st="$(docker inspect -f '{{.State.Status}}' unity-mcp 2>/dev/null || true)"
if [[ "$st" != "running" ]]; then
echo "::error::Unity container exited during wait"; docker logs unity-mcp --tail 200 | sed -E 's/((email|serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig'
exit 1
fi
sleep 2
done
echo "::error::Bridge not ready before deadline"
docker logs unity-mcp --tail 200 | sed -E 's/((email|serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig'
exit 1
# (moved) — return license after Unity is stopped
# ---------- MCP client config ----------
- name: Write MCP config (.claude/mcp.json)
run: |
set -eux
mkdir -p .claude
cat > .claude/mcp.json <<JSON
{
"mcpServers": {
"unity": {
"command": "uv",
"args": ["run","--active","--directory","UnityMcpBridge/UnityMcpServer~/src","python","server.py"],
"transport": { "type": "stdio" },
"env": {
"PYTHONUNBUFFERED": "1",
"MCP_LOG_LEVEL": "debug",
"UNITY_PROJECT_ROOT": "$GITHUB_WORKSPACE/TestProjects/UnityMCPTests",
"UNITY_MCP_STATUS_DIR": "$RUNNER_TEMP/unity-status",
"UNITY_MCP_HOST": "127.0.0.1"
}
}
}
}
JSON
- name: Pin Claude tool permissions (.claude/settings.json)
run: |
set -eux
mkdir -p .claude
cat > .claude/settings.json <<'JSON'
{
"permissions": {
"allow": [
"mcp__unity",
"Edit(reports/**)"
],
"deny": [
"Bash",
"MultiEdit",
"WebFetch",
"WebSearch",
"Task",
"TodoWrite",
"NotebookEdit",
"NotebookRead"
]
}
}
JSON
# ---------- Reports & helper ----------
- name: Prepare reports and dirs
run: |
set -eux
rm -f reports/*.xml reports/*.md || true
mkdir -p reports reports/_snapshots reports/_staging
- name: Create report skeletons
run: |
set -eu
cat > "$JUNIT_OUT" <<'XML'
<?xml version="1.0" encoding="UTF-8"?>
<testsuites><testsuite name="UnityMCP.NL-T" tests="1" failures="1" errors="0" skipped="0" time="0">
<testcase name="NL-Suite.Bootstrap" classname="UnityMCP.NL-T">
<failure message="bootstrap">Bootstrap placeholder; suite will append real tests.</failure>
</testcase>
</testsuite></testsuites>
XML
printf '# Unity NL/T Editing Suite Test Results\n\n' > "$MD_OUT"
- name: Verify Unity bridge status/port
run: |
set -euxo pipefail
ls -la "$RUNNER_TEMP/unity-status" || true
jq -r . "$RUNNER_TEMP"/unity-status/unity-mcp-status-*.json | sed -n '1,80p' || true
shopt -s nullglob
status_files=("$RUNNER_TEMP"/unity-status/unity-mcp-status-*.json)
if ((${#status_files[@]})); then
port="$(grep -hEo '"unity_port"[[:space:]]*:[[:space:]]*[0-9]+' "${status_files[@]}" \
| sed -E 's/.*: *([0-9]+).*/\1/' | head -n1 || true)"
else
port=""
fi
echo "unity_port=$port"
if [[ -n "$port" ]]; then
timeout 1 bash -lc "exec 3<>/dev/tcp/127.0.0.1/$port" && echo "TCP OK"
fi
# (removed) Revert helper and baseline snapshot are no longer used
# ---------- Run suite in two passes ----------
- name: Run Claude NL pass
uses: anthropics/claude-code-base-action@beta
if: steps.detect.outputs.anthropic_ok == 'true' && steps.detect.outputs.unity_ok == 'true'
continue-on-error: true
with:
use_node_cache: false
prompt_file: .claude/prompts/nl-unity-suite-nl.md
mcp_config: .claude/mcp.json
settings: .claude/settings.json
allowed_tools: "mcp__unity,Edit(reports/**),MultiEdit(reports/**)"
disallowed_tools: "Bash,WebFetch,WebSearch,Task,TodoWrite,NotebookEdit,NotebookRead"
model: claude-3-7-sonnet-20250219
append_system_prompt: |
You are running the NL pass only.
- Emit exactly NL-0, NL-1, NL-2, NL-3, NL-4.
- Write each to reports/${ID}_results.xml.
- Prefer a single MultiEdit(reports/**) batch. Do not emit any T-* tests.
- Stop after NL-4_results.xml is written.
timeout_minutes: "30"
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
- name: Run Claude T pass A-J
uses: anthropics/claude-code-base-action@beta
if: steps.detect.outputs.anthropic_ok == 'true' && steps.detect.outputs.unity_ok == 'true'
continue-on-error: true
with:
use_node_cache: false
prompt_file: .claude/prompts/nl-unity-suite-t.md
mcp_config: .claude/mcp.json
settings: .claude/settings.json
allowed_tools: "mcp__unity,Edit(reports/**),MultiEdit(reports/**)"
disallowed_tools: "Bash,WebFetch,WebSearch,Task,TodoWrite,NotebookEdit,NotebookRead"
model: claude-3-5-haiku-20241022
append_system_prompt: |
You are running the T pass (A–J) only.
Output requirements:
- Emit exactly 10 test fragments: T-A, T-B, T-C, T-D, T-E, T-F, T-G, T-H, T-I, T-J.
- Write each fragment to reports/${ID}_results.xml (e.g., T-A_results.xml).
- Prefer a single MultiEdit(reports/**) call that writes all ten files in one batch.
- If MultiEdit is not used, emit individual writes for any missing IDs until all ten exist.
- Do not emit any NL-* fragments.
Stop condition:
- After T-J_results.xml is written, stop.
timeout_minutes: "30"
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
# (moved) Assert T coverage after staged fragments are promoted
- name: Check T coverage incomplete (pre-retry)
id: t_cov
if: always()
shell: bash
run: |
set -euo pipefail
missing=()
for id in T-A T-B T-C T-D T-E T-F T-G T-H T-I T-J; do
if [[ ! -s "reports/${id}_results.xml" && ! -s "reports/_staging/${id}_results.xml" ]]; then
missing+=("$id")
fi
done
echo "missing=${#missing[@]}" >> "$GITHUB_OUTPUT"
if (( ${#missing[@]} )); then
echo "list=${missing[*]}" >> "$GITHUB_OUTPUT"
fi
- name: Retry T pass (Sonnet) if incomplete
if: steps.t_cov.outputs.missing != '0'
uses: anthropics/claude-code-base-action@beta
with:
use_node_cache: false
prompt_file: .claude/prompts/nl-unity-suite-t.md
mcp_config: .claude/mcp.json
settings: .claude/settings.json
allowed_tools: "mcp__unity,Edit(reports/**),MultiEdit(reports/**)"
disallowed_tools: "Bash,MultiEdit(/!(reports/**)),WebFetch,WebSearch,Task,TodoWrite,NotebookEdit,NotebookRead"
model: claude-3-7-sonnet-20250219
fallback_model: claude-3-5-haiku-20241022
append_system_prompt: |
You are running the T pass only.
Output requirements:
- Emit exactly 10 test fragments: T-A, T-B, T-C, T-D, T-E, T-F, T-G, T-H, T-I, T-J.
- Write each fragment to reports/${ID}_results.xml (e.g., T-A_results.xml).
- Prefer a single MultiEdit(reports/**) call that writes all ten files in one batch.
- If MultiEdit is not used, emit individual writes for any missing IDs until all ten exist.
- Do not emit any NL-* fragments.
Stop condition:
- After T-J_results.xml is written, stop.
timeout_minutes: "30"
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
- name: Re-assert T coverage (post-retry)
if: always()
shell: bash
run: |
set -euo pipefail
missing=()
for id in T-A T-B T-C T-D T-E T-F T-G T-H T-I T-J; do
[[ -s "reports/${id}_results.xml" ]] || missing+=("$id")
done
if (( ${#missing[@]} )); then
echo "::error::Still missing T fragments: ${missing[*]}"
exit 1
fi
# (kept) Finalize staged report fragments (promote to reports/)
# (removed duplicate) Finalize staged report fragments
- name: Assert T coverage (after promotion)
if: always()
shell: bash
run: |
set -euo pipefail
missing=()
for id in T-A T-B T-C T-D T-E T-F T-G T-H T-I T-J; do
if [[ ! -s "reports/${id}_results.xml" ]]; then
# Accept staged fragment as present
[[ -s "reports/_staging/${id}_results.xml" ]] || missing+=("$id")
fi
done
if (( ${#missing[@]} )); then
echo "::error::Missing T fragments: ${missing[*]}"
exit 1
fi
- name: Canonicalize testcase names (NL/T prefixes)
if: always()
shell: bash
run: |
python3 - <<'PY'
from pathlib import Path
import xml.etree.ElementTree as ET, re, os
RULES = [
("NL-0", r"\b(NL-0|Baseline|State\s*Capture)\b"),
("NL-1", r"\b(NL-1|Core\s*Method)\b"),
("NL-2", r"\b(NL-2|Anchor|Build\s*marker)\b"),
("NL-3", r"\b(NL-3|End[-\s]*of[-\s]*Class\s*Content|Tail\s*test\s*[ABC])\b"),
("NL-4", r"\b(NL-4|Console|Unity\s*console)\b"),
("T-A", r"\b(T-?A|Temporary\s*Helper)\b"),
("T-B", r"\b(T-?B|Method\s*Body\s*Interior)\b"),
("T-C", r"\b(T-?C|Different\s*Method\s*Interior|ApplyBlend)\b"),
("T-D", r"\b(T-?D|End[-\s]*of[-\s]*Class\s*Helper|TestHelper)\b"),
("T-E", r"\b(T-?E|Method\s*Evolution|Counter|IncrementCounter)\b"),
("T-F", r"\b(T-?F|Atomic\s*Multi[-\s]*Edit)\b"),
("T-G", r"\b(T-?G|Path\s*Normalization)\b"),
("T-H", r"\b(T-?H|Validation\s*on\s*Modified)\b"),
("T-I", r"\b(T-?I|Failure\s*Surface)\b"),
("T-J", r"\b(T-?J|Idempotenc(y|e))\b"),
]
def canon_name(name: str) -> str:
n = name or ""
for tid, pat in RULES:
if re.search(pat, n, flags=re.I):
# If it already starts with the correct format, leave it alone
if re.match(rf'^\s*{re.escape(tid)}\s*[—–-]', n, flags=re.I):
return n.strip()
# If it has a different separator, extract title and reformat
title_match = re.search(rf'{re.escape(tid)}\s*[:.\-–—]\s*(.+)', n, flags=re.I)
if title_match:
title = title_match.group(1).strip()
return f"{tid} — {title}"
# Otherwise, just return the canonical ID
return tid
return n
def id_from_filename(p: Path):
n = p.name
m = re.match(r'NL(\d+)_results\.xml$', n, re.I)
if m:
return f"NL-{int(m.group(1))}"
m = re.match(r'T([A-J])_results\.xml$', n, re.I)
if m:
return f"T-{m.group(1).upper()}"
return None
frags = list(sorted(Path("reports").glob("*_results.xml")))
for frag in frags:
try:
tree = ET.parse(frag); root = tree.getroot()
except Exception:
continue
if root.tag != "testcase":
continue
file_id = id_from_filename(frag)
old = root.get("name") or ""
# Prefer filename-derived ID; if name doesn't start with it, override
if file_id:
# Respect file's ID (prevents T-D being renamed to NL-3 by loose patterns)
title = re.sub(r'^\s*(NL-\d+|T-[A-Z])\s*[—–:\-]\s*', '', old).strip()
new = f"{file_id} — {title}" if title else file_id
else:
new = canon_name(old)
if new != old and new:
root.set("name", new)
tree.write(frag, encoding="utf-8", xml_declaration=False)
print(f'canon: {frag.name}: "{old}" -> "{new}"')
# Note: Do not auto-relable fragments. We rely on per-test strict emission
# and the backfill step to surface missing tests explicitly.
PY
- name: Backfill missing NL/T tests (fail placeholders)
if: always()
shell: bash
run: |
python3 - <<'PY'
from pathlib import Path
import xml.etree.ElementTree as ET
import re
DESIRED = ["NL-0","NL-1","NL-2","NL-3","NL-4","T-A","T-B","T-C","T-D","T-E","T-F","T-G","T-H","T-I","T-J"]
seen = set()
def id_from_filename(p: Path):
n = p.name
m = re.match(r'NL(\d+)_results\.xml$', n, re.I)
if m:
return f"NL-{int(m.group(1))}"
m = re.match(r'T([A-J])_results\.xml$', n, re.I)
if m:
return f"T-{m.group(1).upper()}"
return None
for p in Path("reports").glob("*_results.xml"):
try:
r = ET.parse(p).getroot()
except Exception:
continue
# Count by filename id primarily; fall back to testcase name if needed
fid = id_from_filename(p)
if fid in DESIRED:
seen.add(fid)
continue
if r.tag == "testcase":
name = (r.get("name") or "").strip()
for d in DESIRED:
if name.startswith(d):
seen.add(d)
break
Path("reports").mkdir(parents=True, exist_ok=True)
for d in DESIRED:
if d in seen:
continue
frag = Path(f"reports/{d}_results.xml")
tc = ET.Element("testcase", {"classname":"UnityMCP.NL-T", "name": d})
fail = ET.SubElement(tc, "failure", {"message":"not produced"})
fail.text = "The agent did not emit a fragment for this test."
ET.ElementTree(tc).write(frag, encoding="utf-8", xml_declaration=False)
print(f"backfill: {d}")
PY
- name: "Debug: list testcase names"
if: always()
run: |
python3 - <<'PY'
from pathlib import Path
import xml.etree.ElementTree as ET
for p in sorted(Path('reports').glob('*_results.xml')):
try:
r = ET.parse(p).getroot()
if r.tag == 'testcase':
print(f"{p.name}: {(r.get('name') or '').strip()}")
except Exception:
pass
PY
# ---------- Merge testcase fragments into JUnit ----------
- name: Normalize/assemble JUnit in-place (single file)
if: always()
shell: bash
run: |
python3 - <<'PY'
from pathlib import Path
import xml.etree.ElementTree as ET
import re, os
def localname(tag: str) -> str:
return tag.rsplit('}', 1)[-1] if '}' in tag else tag
src = Path(os.environ.get('JUNIT_OUT', 'reports/junit-nl-suite.xml'))
if not src.exists():
raise SystemExit(0)
tree = ET.parse(src)
root = tree.getroot()
suite = root.find('./*') if localname(root.tag) == 'testsuites' else root
if suite is None:
raise SystemExit(0)
def id_from_filename(p: Path):
n = p.name
m = re.match(r'NL(\d+)_results\.xml$', n, re.I)
if m:
return f"NL-{int(m.group(1))}"
m = re.match(r'T([A-J])_results\.xml$', n, re.I)
if m:
return f"T-{m.group(1).upper()}"
return None
def id_from_system_out(tc):
so = tc.find('system-out')
if so is not None and so.text:
m = re.search(r'\b(NL-\d+|T-[A-Z])\b', so.text)
if m:
return m.group(1)
return None
fragments = sorted(Path('reports').glob('*_results.xml'))
added = 0
renamed = 0
for frag in fragments:
tcs = []
try:
froot = ET.parse(frag).getroot()
if localname(froot.tag) == 'testcase':
tcs = [froot]
else:
tcs = list(froot.findall('.//testcase'))
except Exception:
txt = Path(frag).read_text(encoding='utf-8', errors='replace')
# Extract all testcase nodes from raw text
nodes = re.findall(r'<testcase[\s\S]*?</testcase>', txt, flags=re.DOTALL)
for m in nodes:
try:
tcs.append(ET.fromstring(m))
except Exception:
pass
# Guard: keep only the first testcase from each fragment
if len(tcs) > 1:
tcs = tcs[:1]
test_id = id_from_filename(frag)
for tc in tcs:
current_name = tc.get('name') or ''
tid = test_id or id_from_system_out(tc)
# Enforce filename-derived ID as prefix; repair names if needed
if tid and not re.match(r'^\s*(NL-\d+|T-[A-Z])\b', current_name):
title = current_name.strip()
new_name = f'{tid} — {title}' if title else tid
tc.set('name', new_name)
elif tid and not re.match(rf'^\s*{re.escape(tid)}\b', current_name):
# Replace any wrong leading ID with the correct one
title = re.sub(r'^\s*(NL-\d+|T-[A-Z])\s*[—–:\-]\s*', '', current_name).strip()
new_name = f'{tid} — {title}' if title else tid
tc.set('name', new_name)
renamed += 1
suite.append(tc)
added += 1
if added:
# Drop bootstrap placeholder and recompute counts
for tc in list(suite.findall('.//testcase')):
if (tc.get('name') or '') == 'NL-Suite.Bootstrap':
suite.remove(tc)
testcases = suite.findall('.//testcase')
failures_cnt = sum(1 for tc in testcases if (tc.find('failure') is not None or tc.find('error') is not None))
suite.set('tests', str(len(testcases)))
suite.set('failures', str(failures_cnt))
suite.set('errors', '0')
suite.set('skipped', '0')
tree.write(src, encoding='utf-8', xml_declaration=True)
print(f"Appended {added} testcase(s); renamed {renamed} to canonical NL/T names.")
PY
# ---------- Markdown summary from JUnit ----------
- name: Build markdown summary from JUnit
if: always()
shell: bash
run: |
python3 - <<'PY'
import xml.etree.ElementTree as ET
from pathlib import Path
import os, html, re
def localname(tag: str) -> str:
return tag.rsplit('}', 1)[-1] if '}' in tag else tag
src = Path(os.environ.get('JUNIT_OUT', 'reports/junit-nl-suite.xml'))
md_out = Path(os.environ.get('MD_OUT', 'reports/junit-nl-suite.md'))
md_out.parent.mkdir(parents=True, exist_ok=True)
if not src.exists():
md_out.write_text("# Unity NL/T Editing Suite Test Results\n\n(No JUnit found)\n", encoding='utf-8')
raise SystemExit(0)
tree = ET.parse(src)
root = tree.getroot()
suite = root.find('./*') if localname(root.tag) == 'testsuites' else root
cases = [] if suite is None else list(suite.findall('.//testcase'))
def id_from_case(tc):
n = (tc.get('name') or '')
m = re.match(r'\s*(NL-\d+|T-[A-Z])\b', n)
if m:
return m.group(1)
so = tc.find('system-out')
if so is not None and so.text:
m = re.search(r'\b(NL-\d+|T-[A-Z])\b', so.text)
if m:
return m.group(1)
return None
id_status = {}
name_map = {}
for tc in cases:
tid = id_from_case(tc)
ok = (tc.find('failure') is None and tc.find('error') is None)
if tid and tid not in id_status:
id_status[tid] = ok
name_map[tid] = (tc.get('name') or tid)
desired = ['NL-0','NL-1','NL-2','NL-3','NL-4','T-A','T-B','T-C','T-D','T-E','T-F','T-G','T-H','T-I','T-J']
total = len(cases)
failures = sum(1 for tc in cases if (tc.find('failure') is not None or tc.find('error') is not None))
passed = total - failures
lines = []
lines += [
'# Unity NL/T Editing Suite Test Results',
'',
f'Totals: {passed} passed, {failures} failed, {total} total',
'',
'## Test Checklist'
]
for p in desired:
st = id_status.get(p, None)
lines.append(f"- [x] {p}" if st is True else (f"- [ ] {p} (fail)" if st is False else f"- [ ] {p} (not run)"))
lines.append('')
lines.append('## Test Details')
def order_key(n: str):
if n.startswith('NL-'):
try:
return (0, int(n.split('-')[1]))
except:
return (0, 999)
if n.startswith('T-') and len(n) > 2:
return (1, ord(n[2]))
return (2, n)
MAX_CHARS = 2000
seen = set()
for tid in sorted(id_status.keys(), key=order_key):
seen.add(tid)
tc = next((c for c in cases if (id_from_case(c) == tid)), None)
if not tc:
continue
title = name_map.get(tid, tid)
status_badge = "PASS" if id_status[tid] else "FAIL"
lines.append(f"### {title} — {status_badge}")
so = tc.find('system-out')
text = '' if so is None or so.text is None else html.unescape(so.text.replace('\r\n','\n'))
if text.strip():
t = text.strip()
if len(t) > MAX_CHARS:
t = t[:MAX_CHARS] + "\n…(truncated)"
fence = '```' if '```' not in t else '````'
lines += [fence, t, fence]
else:
lines.append('(no system-out)')
node = tc.find('failure') or tc.find('error')
if node is not None:
msg = (node.get('message') or '').strip()
body = (node.text or '').strip()
if msg:
lines.append(f"- Message: {msg}")
if body:
lines.append(f"- Detail: {body.splitlines()[0][:500]}")
lines.append('')
for tc in cases:
if id_from_case(tc) in seen:
continue
title = tc.get('name') or '(unnamed)'
status_badge = "PASS" if (tc.find('failure') is None and tc.find('error') is None) else "FAIL"
lines.append(f"### {title} — {status_badge}")
lines.append('(unmapped test id)')
lines.append('')
md_out.write_text('\n'.join(lines), encoding='utf-8')
PY
- name: "Debug: list report files"
if: always()
shell: bash
run: |
set -eux
ls -la reports || true
shopt -s nullglob
for f in reports/*.xml; do
echo "===== $f ====="
head -n 40 "$f" || true
done
# ---------- Collect execution transcript (if present) ----------
- name: Collect action execution transcript
if: always()
shell: bash
run: |
set -eux
if [ -f "$RUNNER_TEMP/claude-execution-output.json" ]; then
cp "$RUNNER_TEMP/claude-execution-output.json" reports/claude-execution-output.json
elif [ -f "/home/runner/work/_temp/claude-execution-output.json" ]; then
cp "/home/runner/work/_temp/claude-execution-output.json" reports/claude-execution-output.json
fi
- name: Sanitize markdown (normalize newlines)
if: always()
run: |
set -eu
python3 - <<'PY'
from pathlib import Path
rp=Path('reports'); rp.mkdir(parents=True, exist_ok=True)
for p in rp.glob('*.md'):
b=p.read_bytes().replace(b'\x00', b'')
s=b.decode('utf-8','replace').replace('\r\n','\n')
p.write_text(s, encoding='utf-8', newline='\n')
PY
- name: NL/T details -> Job Summary
if: always()
run: |
echo "## Unity NL/T Editing Suite — Summary" >> $GITHUB_STEP_SUMMARY
python3 - <<'PY' >> $GITHUB_STEP_SUMMARY
from pathlib import Path
p = Path('reports/junit-nl-suite.md')
if p.exists():
text = p.read_bytes().decode('utf-8', 'replace')
MAX = 65000
print(text[:MAX])
if len(text) > MAX:
print("\n\n_…truncated; full report in artifacts._")
else:
print("_No markdown report found._")
PY
- name: Fallback JUnit if missing
if: always()
run: |
set -eu
mkdir -p reports
if [ ! -f "$JUNIT_OUT" ]; then
printf '%s\n' \
'<?xml version="1.0" encoding="UTF-8"?>' \
'<testsuite name="UnityMCP.NL-T" tests="1" failures="1" time="0">' \
' <testcase classname="UnityMCP.NL-T" name="NL-Suite.Execution" time="0.0">' \
' <failure><![CDATA[No JUnit was produced by the NL suite step. See the step logs.]]></failure>' \
' </testcase>' \
'</testsuite>' \
> "$JUNIT_OUT"
fi
- name: Publish JUnit report
if: always()
uses: mikepenz/action-junit-report@v5
with:
report_paths: '${{ env.JUNIT_OUT }}'
include_passed: true
detailed_summary: true
annotate_notice: true
require_tests: false
fail_on_parse_error: true
- name: Upload artifacts (reports + fragments + transcript)
if: always()
uses: actions/upload-artifact@v4
with:
name: claude-nl-suite-artifacts
path: |
${{ env.JUNIT_OUT }}
${{ env.MD_OUT }}
reports/*_results.xml
reports/claude-execution-output.json
retention-days: 7
# ---------- Always stop Unity ----------
- name: Stop Unity
if: always()
run: |
docker logs --tail 400 unity-mcp | sed -E 's/((email|serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' || true
docker rm -f unity-mcp || true
- name: Return Pro license (if used)
if: always() && steps.lic.outputs.use_ebl == 'true' && steps.lic.outputs.has_serial == 'true'
uses: game-ci/unity-return-license@v2
continue-on-error: true
env:
UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }}
UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }}
UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }}