Claude NL/T Full Suite (Unity live) #7

Summary
Jobs
- nl-suite
Run details
- Usage
- Workflow file

Workflow file for this run

.github/workflows/claude-nl-suite.yml at 9d17061

	name: Claude NL/T Full Suite (Unity live)

	on: [workflow_dispatch]

	permissions:
	contents: read
	checks: write

	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true

	env:
	UNITY_IMAGE: unityci/editor:ubuntu-2021.3.45f1-linux-il2cpp-3

	jobs:
	nl-suite:
	runs-on: ubuntu-latest
	timeout-minutes: 60
	env:
	JUNIT_OUT: reports/junit-nl-suite.xml
	MD_OUT: reports/junit-nl-suite.md

	steps:
	# ---------- Secrets check ----------
	- name: Detect secrets (outputs)
	id: detect
	env:
	UNITY_LICENSE: ${{ secrets.UNITY_LICENSE }}
	UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }}
	UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }}
	UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }}
	ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
	run: \|
	set -e
	if [ -n "$ANTHROPIC_API_KEY" ]; then echo "anthropic_ok=true" >> "$GITHUB_OUTPUT"; else echo "anthropic_ok=false" >> "$GITHUB_OUTPUT"; fi
	if [ -n "$UNITY_LICENSE" ] \|\| { [ -n "$UNITY_EMAIL" ] && [ -n "$UNITY_PASSWORD" ]; }; then
	echo "unity_ok=true" >> "$GITHUB_OUTPUT"
	else
	echo "unity_ok=false" >> "$GITHUB_OUTPUT"
	fi

	- uses: actions/checkout@v4
	with:
	fetch-depth: 0

	# ---------- Python env for MCP server (uv) ----------
	- uses: astral-sh/setup-uv@v4
	with:
	python-version: '3.11'

	- name: Install MCP server
	run: \|
	set -eux
	uv venv
	echo "VIRTUAL_ENV=$GITHUB_WORKSPACE/.venv" >> "$GITHUB_ENV"
	echo "$GITHUB_WORKSPACE/.venv/bin" >> "$GITHUB_PATH"
	if [ -f UnityMcpBridge/UnityMcpServer~/src/pyproject.toml ]; then
	uv pip install -e UnityMcpBridge/UnityMcpServer~/src
	elif [ -f UnityMcpBridge/UnityMcpServer~/src/requirements.txt ]; then
	uv pip install -r UnityMcpBridge/UnityMcpServer~/src/requirements.txt
	elif [ -f UnityMcpBridge/UnityMcpServer~/pyproject.toml ]; then
	uv pip install -e UnityMcpBridge/UnityMcpServer~/
	elif [ -f UnityMcpBridge/UnityMcpServer~/requirements.txt ]; then
	uv pip install -r UnityMcpBridge/UnityMcpServer~/requirements.txt
	else
	echo "No MCP Python deps found (skipping)"
	fi

	# --- Licensing: allow both ULF and EBL when available ---
	- name: Decide license sources
	id: lic
	shell: bash
	env:
	UNITY_LICENSE: ${{ secrets.UNITY_LICENSE }}
	UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }}
	UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }}
	UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }}
	run: \|
	set -eu
	use_ulf=false; use_ebl=false
	[[ -n "${UNITY_LICENSE:-}" ]] && use_ulf=true
	[[ -n "${UNITY_EMAIL:-}" && -n "${UNITY_PASSWORD:-}" ]] && use_ebl=true
	echo "use_ulf=$use_ulf" >> "$GITHUB_OUTPUT"
	echo "use_ebl=$use_ebl" >> "$GITHUB_OUTPUT"
	echo "has_serial=$([[ -n "${UNITY_SERIAL:-}" ]] && echo true \|\| echo false)" >> "$GITHUB_OUTPUT"

	- name: Stage Unity .ulf license (from secret)
	if: steps.lic.outputs.use_ulf == 'true'
	id: ulf
	env:
	UNITY_LICENSE: ${{ secrets.UNITY_LICENSE }}
	shell: bash
	run: \|
	set -eu
	mkdir -p "$RUNNER_TEMP/unity-license-ulf" "$RUNNER_TEMP/unity-local/Unity"
	f="$RUNNER_TEMP/unity-license-ulf/Unity_lic.ulf"
	if printf "%s" "$UNITY_LICENSE" \| base64 -d - >/dev/null 2>&1; then
	printf "%s" "$UNITY_LICENSE" \| base64 -d - > "$f"
	else
	printf "%s" "$UNITY_LICENSE" > "$f"
	fi
	chmod 600 "$f" \|\| true
	# If someone pasted an entitlement XML into UNITY_LICENSE by mistake, re-home it:
	if head -c 100 "$f" \| grep -qi '<\?xml'; then
	mkdir -p "$RUNNER_TEMP/unity-config/Unity/licenses"
	mv "$f" "$RUNNER_TEMP/unity-config/Unity/licenses/UnityEntitlementLicense.xml"
	echo "ok=false" >> "$GITHUB_OUTPUT"
	elif grep -qi '<Signature>' "$f"; then
	# provide it in the standard local-share path too
	cp -f "$f" "$RUNNER_TEMP/unity-local/Unity/Unity_lic.ulf"
	echo "ok=true" >> "$GITHUB_OUTPUT"
	else
	echo "ok=false" >> "$GITHUB_OUTPUT"
	fi

	# --- Activate via EBL inside the same Unity image (writes host-side entitlement) ---
	- name: Activate Unity (EBL via container - host-mount)
	if: steps.lic.outputs.use_ebl == 'true'
	shell: bash
	env:
	UNITY_IMAGE: ${{ env.UNITY_IMAGE }}
	UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }}
	UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }}
	UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }}
	run: \|
	set -euxo pipefail
	# host dirs to receive the full Unity config and local-share
	mkdir -p "$RUNNER_TEMP/unity-config" "$RUNNER_TEMP/unity-local"

	# Try Pro first if serial is present, otherwise named-user EBL.
	docker run --rm --network host \
	-e HOME=/root \
	-e UNITY_EMAIL -e UNITY_PASSWORD -e UNITY_SERIAL \
	-v "$RUNNER_TEMP/unity-config:/root/.config/unity3d" \
	-v "$RUNNER_TEMP/unity-local:/root/.local/share/unity3d" \
	"$UNITY_IMAGE" bash -lc '
	set -euxo pipefail
	if [[ -n "${UNITY_SERIAL:-}" ]]; then
	/opt/unity/Editor/Unity -batchmode -nographics -logFile - \
	-username "$UNITY_EMAIL" -password "$UNITY_PASSWORD" -serial "$UNITY_SERIAL" -quit \|\| true
	else
	/opt/unity/Editor/Unity -batchmode -nographics -logFile - \
	-username "$UNITY_EMAIL" -password "$UNITY_PASSWORD" -quit \|\| true
	fi
	ls -la /root/.config/unity3d/Unity/licenses \|\| true
	'

	# Verify entitlement written to host mount; allow ULF-only runs to proceed
	if ! find "$RUNNER_TEMP/unity-config" -type f -iname "*.xml" \| grep -q .; then
	if [[ "${{ steps.ulf.outputs.ok }}" == "true" ]]; then
	echo "EBL entitlement not found; proceeding with ULF-only (ok=true)."
	else
	echo "No entitlement produced and no valid ULF; cannot continue." >&2
	exit 1
	fi
	fi

	# EBL entitlement is already written directly to $RUNNER_TEMP/unity-config by the activation step

	# ---------- Warm up project (import Library once) ----------
	- name: Warm up project (import Library once)
	if: steps.lic.outputs.use_ulf == 'true' \|\| steps.lic.outputs.use_ebl == 'true'
	shell: bash
	env:
	UNITY_IMAGE: ${{ env.UNITY_IMAGE }}
	ULF_OK: ${{ steps.ulf.outputs.ok }}
	run: \|
	set -euxo pipefail
	manual_args=()
	if [[ "${ULF_OK:-false}" == "true" ]]; then
	manual_args=(-manualLicenseFile "/root/.local/share/unity3d/Unity/Unity_lic.ulf")
	fi
	docker run --rm --network host \
	-e HOME=/root \
	-v "${{ github.workspace }}:/workspace" -w /workspace \
	-v "$RUNNER_TEMP/unity-config:/root/.config/unity3d" \
	-v "$RUNNER_TEMP/unity-local:/root/.local/share/unity3d" \
	"$UNITY_IMAGE" /opt/unity/Editor/Unity -batchmode -nographics -logFile - \
	-projectPath /workspace/TestProjects/UnityMCPTests \
	"${manual_args[@]}" \
	-quit

	# ---------- Clean old MCP status ----------
	- name: Clean old MCP status
	run: \|
	set -eux
	mkdir -p "$HOME/.unity-mcp"
	rm -f "$HOME/.unity-mcp"/unity-mcp-status-*.json \|\| true

	# ---------- Start headless Unity (persistent bridge) ----------
	- name: Start Unity (persistent bridge)
	if: steps.lic.outputs.use_ulf == 'true' \|\| steps.lic.outputs.use_ebl == 'true'
	shell: bash
	env:
	UNITY_IMAGE: ${{ env.UNITY_IMAGE }}
	ULF_OK: ${{ steps.ulf.outputs.ok }}
	run: \|
	set -euxo pipefail
	manual_args=()
	if [[ "${ULF_OK:-false}" == "true" ]]; then
	manual_args=(-manualLicenseFile "/root/.local/share/unity3d/Unity/Unity_lic.ulf")
	fi

	mkdir -p "$RUNNER_TEMP/unity-status"
	docker rm -f unity-mcp >/dev/null 2>&1 \|\| true
	docker run -d --name unity-mcp --network host \
	-e HOME=/root \
	-e UNITY_MCP_ALLOW_BATCH=1 \
	-e UNITY_MCP_STATUS_DIR=/root/.unity-mcp \
	-e UNITY_MCP_BIND_HOST=127.0.0.1 \
	-v "${{ github.workspace }}:/workspace" -w /workspace \
	-v "$RUNNER_TEMP/unity-status:/root/.unity-mcp" \
	-v "$RUNNER_TEMP/unity-config:/root/.config/unity3d:ro" \
	-v "$RUNNER_TEMP/unity-local:/root/.local/share/unity3d:ro" \
	"$UNITY_IMAGE" /opt/unity/Editor/Unity -batchmode -nographics -logFile - \
	-stackTraceLogType Full \
	-projectPath /workspace/TestProjects/UnityMCPTests \
	"${manual_args[@]}" \
	-executeMethod MCPForUnity.Editor.MCPForUnityBridge.StartAutoConnect

	# ---------- Wait for Unity bridge ----------
	- name: Wait for Unity bridge (robust)
	shell: bash
	run: \|
	set -euo pipefail
	deadline=$((SECONDS+900)) # 15 min max
	fatal_after=$((SECONDS+120)) # give licensing 2 min to settle

	# Fail fast only if container actually died
	st="$(docker inspect -f '{{.State.Status}} {{.State.ExitCode}}' unity-mcp 2>/dev/null \|\| true)"
	case "$st" in exited\|dead) docker logs unity-mcp --tail 200 \| sed -E 's/((email\|serial\|license\|password\|token)[^[:space:]]*)/[REDACTED]/Ig'; exit 1;; esac

	# Patterns
	ok_pat='(Bridge\|MCP(For)?Unity\|AutoConnect).*(listening\|ready\|started\|port\|bound)'
	# Only truly fatal signals; allow transient "Licensing::..." chatter
	license_fatal='No valid Unity\|License is not active\|cannot load ULF\|Signature element not found\|Token not found\|0 entitlement\|Entitlement.(failed\|denied)\|License (activation\|return\|renewal).(failed\|expired\|denied)'

	while [ $SECONDS -lt $deadline ]; do
	logs="$(docker logs unity-mcp 2>&1 \|\| true)"

	# 1) Primary: status JSON exposes TCP port
	port="$(jq -r '.unity_port // empty' "$RUNNER_TEMP"/unity-status/unity-mcp-status-*.json 2>/dev/null \| head -n1 \|\| true)"
	if [[ -n "${port:-}" ]] && timeout 1 bash -lc "exec 3<>/dev/tcp/127.0.0.1/$port"; then
	echo "Bridge ready on port $port"
	exit 0
	fi

	# 2) Secondary: log markers
	if echo "$logs" \| grep -qiE "$ok_pat"; then
	echo "Bridge ready (log markers)"
	exit 0
	fi

	# Only treat license failures as fatal after warm-up
	if [ $SECONDS -ge $fatal_after ] && echo "$logs" \| grep -qiE "$license_fatal"; then
	echo "::error::Fatal licensing signal detected after warm-up"
	echo "$logs" \| tail -n 200 \| sed -E 's/((email\|serial\|license\|password\|token)[^[:space:]]*)/[REDACTED]/Ig'
	exit 1
	fi

	# If the container dies mid-wait, bail
	st="$(docker inspect -f '{{.State.Status}}' unity-mcp 2>/dev/null \|\| true)"
	if [[ "$st" != "running" ]]; then
	echo "::error::Unity container exited during wait"; docker logs unity-mcp --tail 200 \| sed -E 's/((email\|serial\|license\|password\|token)[^[:space:]]*)/[REDACTED]/Ig'
	exit 1
	fi

	sleep 2
	done

	echo "::error::Bridge not ready before deadline"
	docker logs unity-mcp --tail 200 \| sed -E 's/((email\|serial\|license\|password\|token)[^[:space:]]*)/[REDACTED]/Ig'
	exit 1

	# (moved) — return license after Unity is stopped

	# ---------- MCP client config ----------
	- name: Write MCP config (.claude/mcp.json)
	run: \|
	set -eux
	mkdir -p .claude
	cat > .claude/mcp.json <<JSON
	{
	"mcpServers": {
	"unity": {
	"command": "uv",
	"args": ["run","--active","--directory","UnityMcpBridge/UnityMcpServer~/src","python","server.py"],
	"transport": { "type": "stdio" },
	"env": {
	"PYTHONUNBUFFERED": "1",
	"MCP_LOG_LEVEL": "debug",
	"UNITY_PROJECT_ROOT": "$GITHUB_WORKSPACE/TestProjects/UnityMCPTests",
	"UNITY_MCP_STATUS_DIR": "$RUNNER_TEMP/unity-status",
	"UNITY_MCP_HOST": "127.0.0.1"
	}
	}
	}
	}
	JSON

	- name: Pin Claude tool permissions (.claude/settings.json)
	run: \|
	set -eux
	mkdir -p .claude
	cat > .claude/settings.json <<'JSON'
	{
	"permissions": {
	"allow": [
	"mcp__unity",
	"Edit(reports/**)"
	],
	"deny": [
	"Bash",
	"MultiEdit",
	"WebFetch",
	"WebSearch",
	"Task",
	"TodoWrite",
	"NotebookEdit",
	"NotebookRead"
	]
	}
	}
	JSON

	# ---------- Reports & helper ----------
	- name: Prepare reports and dirs
	run: \|
	set -eux
	rm -f reports/.xml reports/.md \|\| true
	mkdir -p reports reports/_snapshots reports/_staging

	- name: Create report skeletons
	run: \|
	set -eu
	cat > "$JUNIT_OUT" <<'XML'
	<?xml version="1.0" encoding="UTF-8"?>
	<testsuites><testsuite name="UnityMCP.NL-T" tests="1" failures="1" errors="0" skipped="0" time="0">
	<testcase name="NL-Suite.Bootstrap" classname="UnityMCP.NL-T">
	<failure message="bootstrap">Bootstrap placeholder; suite will append real tests.</failure>
	</testcase>
	</testsuite></testsuites>
	XML
	printf '# Unity NL/T Editing Suite Test Results\n\n' > "$MD_OUT"

	- name: Verify Unity bridge status/port
	run: \|
	set -euxo pipefail
	ls -la "$RUNNER_TEMP/unity-status" \|\| true
	jq -r . "$RUNNER_TEMP"/unity-status/unity-mcp-status-*.json \| sed -n '1,80p' \|\| true

	shopt -s nullglob
	status_files=("$RUNNER_TEMP"/unity-status/unity-mcp-status-*.json)
	if ((${#status_files[@]})); then
	port="$(grep -hEo '"unity_port"[[:space:]]:[[:space:]][0-9]+' "${status_files[@]}" \
	\| sed -E 's/.: ([0-9]+).*/\1/' \| head -n1 \|\| true)"
	else
	port=""
	fi

	echo "unity_port=$port"
	if [[ -n "$port" ]]; then
	timeout 1 bash -lc "exec 3<>/dev/tcp/127.0.0.1/$port" && echo "TCP OK"
	fi

	# (removed) Revert helper and baseline snapshot are no longer used


	# ---------- Run suite in two passes ----------
	- name: Run Claude NL pass
	uses: anthropics/claude-code-base-action@beta
	if: steps.detect.outputs.anthropic_ok == 'true' && steps.detect.outputs.unity_ok == 'true'
	continue-on-error: true
	with:
	use_node_cache: false
	prompt_file: .claude/prompts/nl-unity-suite-nl.md
	mcp_config: .claude/mcp.json
	settings: .claude/settings.json
	allowed_tools: "mcp__unity,Edit(reports/),MultiEdit(reports/)"
	disallowed_tools: "Bash,WebFetch,WebSearch,Task,TodoWrite,NotebookEdit,NotebookRead"
	model: claude-3-7-sonnet-20250219
	append_system_prompt: \|
	You are running the NL pass only.
	- Emit exactly NL-0, NL-1, NL-2, NL-3, NL-4.
	- Write each to reports/${ID}_results.xml.
	- Prefer a single MultiEdit(reports/*) batch. Do not emit any T- tests.
	- Stop after NL-4_results.xml is written.
	timeout_minutes: "30"
	anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}


	- name: Run Claude T pass A-J
	uses: anthropics/claude-code-base-action@beta
	if: steps.detect.outputs.anthropic_ok == 'true' && steps.detect.outputs.unity_ok == 'true'
	continue-on-error: true
	with:
	use_node_cache: false
	prompt_file: .claude/prompts/nl-unity-suite-t.md
	mcp_config: .claude/mcp.json
	settings: .claude/settings.json
	allowed_tools: "mcp__unity,Edit(reports/),MultiEdit(reports/)"
	disallowed_tools: "Bash,WebFetch,WebSearch,Task,TodoWrite,NotebookEdit,NotebookRead"
	model: claude-3-5-haiku-20241022
	append_system_prompt: \|
	You are running the T pass (A–J) only.
	Output requirements:
	- Emit exactly 10 test fragments: T-A, T-B, T-C, T-D, T-E, T-F, T-G, T-H, T-I, T-J.
	- Write each fragment to reports/${ID}_results.xml (e.g., T-A_results.xml).
	- Prefer a single MultiEdit(reports/**) call that writes all ten files in one batch.
	- If MultiEdit is not used, emit individual writes for any missing IDs until all ten exist.
	- Do not emit any NL-* fragments.
	Stop condition:
	- After T-J_results.xml is written, stop.
	timeout_minutes: "30"
	anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}

	# (moved) Assert T coverage after staged fragments are promoted

	- name: Check T coverage incomplete (pre-retry)
	id: t_cov
	if: always()
	shell: bash
	run: \|
	set -euo pipefail
	missing=()
	for id in T-A T-B T-C T-D T-E T-F T-G T-H T-I T-J; do
	if [[ ! -s "reports/${id}_results.xml" && ! -s "reports/_staging/${id}_results.xml" ]]; then
	missing+=("$id")
	fi
	done
	echo "missing=${#missing[@]}" >> "$GITHUB_OUTPUT"
	if (( ${#missing[@]} )); then
	echo "list=${missing[*]}" >> "$GITHUB_OUTPUT"
	fi

	- name: Retry T pass (Sonnet) if incomplete
	if: steps.t_cov.outputs.missing != '0'
	uses: anthropics/claude-code-base-action@beta
	with:
	use_node_cache: false
	prompt_file: .claude/prompts/nl-unity-suite-t.md
	mcp_config: .claude/mcp.json
	settings: .claude/settings.json
	allowed_tools: "mcp__unity,Edit(reports/),MultiEdit(reports/)"
	disallowed_tools: "Bash,MultiEdit(/!(reports/**)),WebFetch,WebSearch,Task,TodoWrite,NotebookEdit,NotebookRead"
	model: claude-3-7-sonnet-20250219
	fallback_model: claude-3-5-haiku-20241022
	append_system_prompt: \|
	You are running the T pass only.
	Output requirements:
	- Emit exactly 10 test fragments: T-A, T-B, T-C, T-D, T-E, T-F, T-G, T-H, T-I, T-J.
	- Write each fragment to reports/${ID}_results.xml (e.g., T-A_results.xml).
	- Prefer a single MultiEdit(reports/**) call that writes all ten files in one batch.
	- If MultiEdit is not used, emit individual writes for any missing IDs until all ten exist.
	- Do not emit any NL-* fragments.
	Stop condition:
	- After T-J_results.xml is written, stop.
	timeout_minutes: "30"
	anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}

	- name: Re-assert T coverage (post-retry)
	if: always()
	shell: bash
	run: \|
	set -euo pipefail
	missing=()
	for id in T-A T-B T-C T-D T-E T-F T-G T-H T-I T-J; do
	[[ -s "reports/${id}_results.xml" ]] \|\| missing+=("$id")
	done
	if (( ${#missing[@]} )); then
	echo "::error::Still missing T fragments: ${missing[*]}"
	exit 1
	fi

	# (kept) Finalize staged report fragments (promote to reports/)

	# (removed duplicate) Finalize staged report fragments

	- name: Assert T coverage (after promotion)
	if: always()
	shell: bash
	run: \|
	set -euo pipefail
	missing=()
	for id in T-A T-B T-C T-D T-E T-F T-G T-H T-I T-J; do
	if [[ ! -s "reports/${id}_results.xml" ]]; then
	# Accept staged fragment as present
	[[ -s "reports/_staging/${id}_results.xml" ]] \|\| missing+=("$id")
	fi
	done
	if (( ${#missing[@]} )); then
	echo "::error::Missing T fragments: ${missing[*]}"
	exit 1
	fi

	- name: Canonicalize testcase names (NL/T prefixes)
	if: always()
	shell: bash
	run: \|
	python3 - <<'PY'
	from pathlib import Path
	import xml.etree.ElementTree as ET, re, os

	RULES = [
	("NL-0", r"\b(NL-0\|Baseline\|State\s*Capture)\b"),
	("NL-1", r"\b(NL-1\|Core\s*Method)\b"),
	("NL-2", r"\b(NL-2\|Anchor\|Build\s*marker)\b"),
	("NL-3", r"\b(NL-3\|End[-\s]of[-\s]Class\sContent\|Tail\stest\s*[ABC])\b"),
	("NL-4", r"\b(NL-4\|Console\|Unity\s*console)\b"),
	("T-A", r"\b(T-?A\|Temporary\s*Helper)\b"),
	("T-B", r"\b(T-?B\|Method\sBody\sInterior)\b"),
	("T-C", r"\b(T-?C\|Different\sMethod\sInterior\|ApplyBlend)\b"),
	("T-D", r"\b(T-?D\|End[-\s]of[-\s]Class\s*Helper\|TestHelper)\b"),
	("T-E", r"\b(T-?E\|Method\s*Evolution\|Counter\|IncrementCounter)\b"),
	("T-F", r"\b(T-?F\|Atomic\sMulti[-\s]Edit)\b"),
	("T-G", r"\b(T-?G\|Path\s*Normalization)\b"),
	("T-H", r"\b(T-?H\|Validation\son\sModified)\b"),
	("T-I", r"\b(T-?I\|Failure\s*Surface)\b"),
	("T-J", r"\b(T-?J\|Idempotenc(y\|e))\b"),
	]

	def canon_name(name: str) -> str:
	n = name or ""
	for tid, pat in RULES:
	if re.search(pat, n, flags=re.I):
	# If it already starts with the correct format, leave it alone
	if re.match(rf'^\s{re.escape(tid)}\s[—–-]', n, flags=re.I):
	return n.strip()
	# If it has a different separator, extract title and reformat
	title_match = re.search(rf'{re.escape(tid)}\s[:.\-–—]\s(.+)', n, flags=re.I)
	if title_match:
	title = title_match.group(1).strip()
	return f"{tid} — {title}"
	# Otherwise, just return the canonical ID
	return tid
	return n

	def id_from_filename(p: Path):
	n = p.name
	m = re.match(r'NL(\d+)_results\.xml$', n, re.I)
	if m:
	return f"NL-{int(m.group(1))}"
	m = re.match(r'T([A-J])_results\.xml$', n, re.I)
	if m:
	return f"T-{m.group(1).upper()}"
	return None

	frags = list(sorted(Path("reports").glob("*_results.xml")))
	for frag in frags:
	try:
	tree = ET.parse(frag); root = tree.getroot()
	except Exception:
	continue
	if root.tag != "testcase":
	continue
	file_id = id_from_filename(frag)
	old = root.get("name") or ""
	# Prefer filename-derived ID; if name doesn't start with it, override
	if file_id:
	# Respect file's ID (prevents T-D being renamed to NL-3 by loose patterns)
	title = re.sub(r'^\s(NL-\d+\|T-[A-Z])\s[—–:\-]\s*', '', old).strip()
	new = f"{file_id} — {title}" if title else file_id
	else:
	new = canon_name(old)
	if new != old and new:
	root.set("name", new)
	tree.write(frag, encoding="utf-8", xml_declaration=False)
	print(f'canon: {frag.name}: "{old}" -> "{new}"')

	# Note: Do not auto-relable fragments. We rely on per-test strict emission
	# and the backfill step to surface missing tests explicitly.
	PY

	- name: Backfill missing NL/T tests (fail placeholders)
	if: always()
	shell: bash
	run: \|
	python3 - <<'PY'
	from pathlib import Path
	import xml.etree.ElementTree as ET
	import re

	DESIRED = ["NL-0","NL-1","NL-2","NL-3","NL-4","T-A","T-B","T-C","T-D","T-E","T-F","T-G","T-H","T-I","T-J"]
	seen = set()
	def id_from_filename(p: Path):
	n = p.name
	m = re.match(r'NL(\d+)_results\.xml$', n, re.I)
	if m:
	return f"NL-{int(m.group(1))}"
	m = re.match(r'T([A-J])_results\.xml$', n, re.I)
	if m:
	return f"T-{m.group(1).upper()}"
	return None

	for p in Path("reports").glob("*_results.xml"):
	try:
	r = ET.parse(p).getroot()
	except Exception:
	continue
	# Count by filename id primarily; fall back to testcase name if needed
	fid = id_from_filename(p)
	if fid in DESIRED:
	seen.add(fid)
	continue
	if r.tag == "testcase":
	name = (r.get("name") or "").strip()
	for d in DESIRED:
	if name.startswith(d):
	seen.add(d)
	break

	Path("reports").mkdir(parents=True, exist_ok=True)
	for d in DESIRED:
	if d in seen:
	continue
	frag = Path(f"reports/{d}_results.xml")
	tc = ET.Element("testcase", {"classname":"UnityMCP.NL-T", "name": d})
	fail = ET.SubElement(tc, "failure", {"message":"not produced"})
	fail.text = "The agent did not emit a fragment for this test."
	ET.ElementTree(tc).write(frag, encoding="utf-8", xml_declaration=False)
	print(f"backfill: {d}")
	PY

	- name: "Debug: list testcase names"
	if: always()
	run: \|
	python3 - <<'PY'
	from pathlib import Path
	import xml.etree.ElementTree as ET
	for p in sorted(Path('reports').glob('*_results.xml')):
	try:
	r = ET.parse(p).getroot()
	if r.tag == 'testcase':
	print(f"{p.name}: {(r.get('name') or '').strip()}")
	except Exception:
	pass
	PY

	# ---------- Merge testcase fragments into JUnit ----------
	- name: Normalize/assemble JUnit in-place (single file)
	if: always()
	shell: bash
	run: \|
	python3 - <<'PY'
	from pathlib import Path
	import xml.etree.ElementTree as ET
	import re, os

	def localname(tag: str) -> str:
	return tag.rsplit('}', 1)[-1] if '}' in tag else tag

	src = Path(os.environ.get('JUNIT_OUT', 'reports/junit-nl-suite.xml'))
	if not src.exists():
	raise SystemExit(0)

	tree = ET.parse(src)
	root = tree.getroot()
	suite = root.find('./*') if localname(root.tag) == 'testsuites' else root
	if suite is None:
	raise SystemExit(0)

	def id_from_filename(p: Path):
	n = p.name
	m = re.match(r'NL(\d+)_results\.xml$', n, re.I)
	if m:
	return f"NL-{int(m.group(1))}"
	m = re.match(r'T([A-J])_results\.xml$', n, re.I)
	if m:
	return f"T-{m.group(1).upper()}"
	return None

	def id_from_system_out(tc):
	so = tc.find('system-out')
	if so is not None and so.text:
	m = re.search(r'\b(NL-\d+\|T-[A-Z])\b', so.text)
	if m:
	return m.group(1)
	return None

	fragments = sorted(Path('reports').glob('*_results.xml'))
	added = 0
	renamed = 0

	for frag in fragments:
	tcs = []
	try:
	froot = ET.parse(frag).getroot()
	if localname(froot.tag) == 'testcase':
	tcs = [froot]
	else:
	tcs = list(froot.findall('.//testcase'))
	except Exception:
	txt = Path(frag).read_text(encoding='utf-8', errors='replace')
	# Extract all testcase nodes from raw text
	nodes = re.findall(r'<testcase[\s\S]*?</testcase>', txt, flags=re.DOTALL)
	for m in nodes:
	try:
	tcs.append(ET.fromstring(m))
	except Exception:
	pass

	# Guard: keep only the first testcase from each fragment
	if len(tcs) > 1:
	tcs = tcs[:1]

	test_id = id_from_filename(frag)

	for tc in tcs:
	current_name = tc.get('name') or ''
	tid = test_id or id_from_system_out(tc)
	# Enforce filename-derived ID as prefix; repair names if needed
	if tid and not re.match(r'^\s*(NL-\d+\|T-[A-Z])\b', current_name):
	title = current_name.strip()
	new_name = f'{tid} — {title}' if title else tid
	tc.set('name', new_name)
	elif tid and not re.match(rf'^\s*{re.escape(tid)}\b', current_name):
	# Replace any wrong leading ID with the correct one
	title = re.sub(r'^\s(NL-\d+\|T-[A-Z])\s[—–:\-]\s*', '', current_name).strip()
	new_name = f'{tid} — {title}' if title else tid
	tc.set('name', new_name)
	renamed += 1
	suite.append(tc)
	added += 1

	if added:
	# Drop bootstrap placeholder and recompute counts
	for tc in list(suite.findall('.//testcase')):
	if (tc.get('name') or '') == 'NL-Suite.Bootstrap':
	suite.remove(tc)
	testcases = suite.findall('.//testcase')
	failures_cnt = sum(1 for tc in testcases if (tc.find('failure') is not None or tc.find('error') is not None))
	suite.set('tests', str(len(testcases)))
	suite.set('failures', str(failures_cnt))
	suite.set('errors', '0')
	suite.set('skipped', '0')
	tree.write(src, encoding='utf-8', xml_declaration=True)
	print(f"Appended {added} testcase(s); renamed {renamed} to canonical NL/T names.")
	PY

	# ---------- Markdown summary from JUnit ----------
	- name: Build markdown summary from JUnit
	if: always()
	shell: bash
	run: \|
	python3 - <<'PY'
	import xml.etree.ElementTree as ET
	from pathlib import Path
	import os, html, re

	def localname(tag: str) -> str:
	return tag.rsplit('}', 1)[-1] if '}' in tag else tag

	src = Path(os.environ.get('JUNIT_OUT', 'reports/junit-nl-suite.xml'))
	md_out = Path(os.environ.get('MD_OUT', 'reports/junit-nl-suite.md'))
	md_out.parent.mkdir(parents=True, exist_ok=True)

	if not src.exists():
	md_out.write_text("# Unity NL/T Editing Suite Test Results\n\n(No JUnit found)\n", encoding='utf-8')
	raise SystemExit(0)

	tree = ET.parse(src)
	root = tree.getroot()
	suite = root.find('./*') if localname(root.tag) == 'testsuites' else root
	cases = [] if suite is None else list(suite.findall('.//testcase'))

	def id_from_case(tc):
	n = (tc.get('name') or '')
	m = re.match(r'\s*(NL-\d+\|T-[A-Z])\b', n)
	if m:
	return m.group(1)
	so = tc.find('system-out')
	if so is not None and so.text:
	m = re.search(r'\b(NL-\d+\|T-[A-Z])\b', so.text)
	if m:
	return m.group(1)
	return None

	id_status = {}
	name_map = {}
	for tc in cases:
	tid = id_from_case(tc)
	ok = (tc.find('failure') is None and tc.find('error') is None)
	if tid and tid not in id_status:
	id_status[tid] = ok
	name_map[tid] = (tc.get('name') or tid)

	desired = ['NL-0','NL-1','NL-2','NL-3','NL-4','T-A','T-B','T-C','T-D','T-E','T-F','T-G','T-H','T-I','T-J']

	total = len(cases)
	failures = sum(1 for tc in cases if (tc.find('failure') is not None or tc.find('error') is not None))
	passed = total - failures

	lines = []
	lines += [
	'# Unity NL/T Editing Suite Test Results',
	'',
	f'Totals: {passed} passed, {failures} failed, {total} total',
	'',
	'## Test Checklist'
	]
	for p in desired:
	st = id_status.get(p, None)
	lines.append(f"- [x] {p}" if st is True else (f"- [ ] {p} (fail)" if st is False else f"- [ ] {p} (not run)"))
	lines.append('')

	lines.append('## Test Details')

	def order_key(n: str):
	if n.startswith('NL-'):
	try:
	return (0, int(n.split('-')[1]))
	except:
	return (0, 999)
	if n.startswith('T-') and len(n) > 2:
	return (1, ord(n[2]))
	return (2, n)

	MAX_CHARS = 2000
	seen = set()
	for tid in sorted(id_status.keys(), key=order_key):
	seen.add(tid)
	tc = next((c for c in cases if (id_from_case(c) == tid)), None)
	if not tc:
	continue
	title = name_map.get(tid, tid)
	status_badge = "PASS" if id_status[tid] else "FAIL"
	lines.append(f"### {title} — {status_badge}")
	so = tc.find('system-out')
	text = '' if so is None or so.text is None else html.unescape(so.text.replace('\r\n','\n'))
	if text.strip():
	t = text.strip()
	if len(t) > MAX_CHARS:
	t = t[:MAX_CHARS] + "\n…(truncated)"
	fence = '```' if '```' not in t else '````'
	lines += [fence, t, fence]
	else:
	lines.append('(no system-out)')
	node = tc.find('failure') or tc.find('error')
	if node is not None:
	msg = (node.get('message') or '').strip()
	body = (node.text or '').strip()
	if msg:
	lines.append(f"- Message: {msg}")
	if body:
	lines.append(f"- Detail: {body.splitlines()[0][:500]}")
	lines.append('')

	for tc in cases:
	if id_from_case(tc) in seen:
	continue
	title = tc.get('name') or '(unnamed)'
	status_badge = "PASS" if (tc.find('failure') is None and tc.find('error') is None) else "FAIL"
	lines.append(f"### {title} — {status_badge}")
	lines.append('(unmapped test id)')
	lines.append('')

	md_out.write_text('\n'.join(lines), encoding='utf-8')
	PY

	- name: "Debug: list report files"
	if: always()
	shell: bash
	run: \|
	set -eux
	ls -la reports \|\| true
	shopt -s nullglob
	for f in reports/*.xml; do
	echo "===== $f ====="
	head -n 40 "$f" \|\| true
	done

	# ---------- Collect execution transcript (if present) ----------
	- name: Collect action execution transcript
	if: always()
	shell: bash
	run: \|
	set -eux
	if [ -f "$RUNNER_TEMP/claude-execution-output.json" ]; then
	cp "$RUNNER_TEMP/claude-execution-output.json" reports/claude-execution-output.json
	elif [ -f "/home/runner/work/_temp/claude-execution-output.json" ]; then
	cp "/home/runner/work/_temp/claude-execution-output.json" reports/claude-execution-output.json
	fi

	- name: Sanitize markdown (normalize newlines)
	if: always()
	run: \|
	set -eu
	python3 - <<'PY'
	from pathlib import Path
	rp=Path('reports'); rp.mkdir(parents=True, exist_ok=True)
	for p in rp.glob('*.md'):
	b=p.read_bytes().replace(b'\x00', b'')
	s=b.decode('utf-8','replace').replace('\r\n','\n')
	p.write_text(s, encoding='utf-8', newline='\n')
	PY

	- name: NL/T details -> Job Summary
	if: always()
	run: \|
	echo "## Unity NL/T Editing Suite — Summary" >> $GITHUB_STEP_SUMMARY
	python3 - <<'PY' >> $GITHUB_STEP_SUMMARY
	from pathlib import Path
	p = Path('reports/junit-nl-suite.md')
	if p.exists():
	text = p.read_bytes().decode('utf-8', 'replace')
	MAX = 65000
	print(text[:MAX])
	if len(text) > MAX:
	print("\n\n_…truncated; full report in artifacts._")
	else:
	print("_No markdown report found._")
	PY

	- name: Fallback JUnit if missing
	if: always()
	run: \|
	set -eu
	mkdir -p reports
	if [ ! -f "$JUNIT_OUT" ]; then
	printf '%s\n' \
	'<?xml version="1.0" encoding="UTF-8"?>' \
	'<testsuite name="UnityMCP.NL-T" tests="1" failures="1" time="0">' \
	' <testcase classname="UnityMCP.NL-T" name="NL-Suite.Execution" time="0.0">' \
	' <failure><![CDATA[No JUnit was produced by the NL suite step. See the step logs.]]></failure>' \
	' </testcase>' \
	'</testsuite>' \
	> "$JUNIT_OUT"
	fi

	- name: Publish JUnit report
	if: always()
	uses: mikepenz/action-junit-report@v5
	with:
	report_paths: '${{ env.JUNIT_OUT }}'
	include_passed: true
	detailed_summary: true
	annotate_notice: true
	require_tests: false
	fail_on_parse_error: true

	- name: Upload artifacts (reports + fragments + transcript)
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: claude-nl-suite-artifacts
	path: \|
	${{ env.JUNIT_OUT }}
	${{ env.MD_OUT }}
	reports/*_results.xml
	reports/claude-execution-output.json
	retention-days: 7

	# ---------- Always stop Unity ----------
	- name: Stop Unity
	if: always()
	run: \|
	docker logs --tail 400 unity-mcp \| sed -E 's/((email\|serial\|license\|password\|token)[^[:space:]]*)/[REDACTED]/ig' \|\| true
	docker rm -f unity-mcp \|\| true

	- name: Return Pro license (if used)
	if: always() && steps.lic.outputs.use_ebl == 'true' && steps.lic.outputs.has_serial == 'true'
	uses: game-ci/unity-return-license@v2
	continue-on-error: true
	env:
	UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }}
	UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }}
	UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Claude NL/T Full Suite (Unity live) #7

Workflow file

Claude NL/T Full Suite (Unity live) #7

Uh oh!

Jobs

Run details

Workflow file for this run