From e6d6c98069f44989d831e9e5eea98287df26d658 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 15:02:52 -0700 Subject: [PATCH 01/28] CI: streamline Unity licensing (ULF/EBL); drop cache mounts & EBL-in-container; NL suite: clarify T-E/T-J, anchor positions, EOF brace targeting, SHA preconditions --- .../prompts/nl-unity-suite-full-additive.md | 20 ++- .github/workflows/claude-nl-suite.yml | 169 ++++++++++-------- 2 files changed, 103 insertions(+), 86 deletions(-) diff --git a/.claude/prompts/nl-unity-suite-full-additive.md b/.claude/prompts/nl-unity-suite-full-additive.md index f4c65fe6..eabeebac 100644 --- a/.claude/prompts/nl-unity-suite-full-additive.md +++ b/.claude/prompts/nl-unity-suite-full-additive.md @@ -32,6 +32,7 @@ CI provides: ## Tool Mapping - **Anchors/regex/structured**: `mcp__unity__script_apply_edits` - Allowed ops: `anchor_insert`, `replace_method`, `insert_method`, `delete_method`, `regex_replace` + - For `anchor_insert`, always set `"position": "before"` or `"after"`. - **Precise ranges / atomic batch**: `mcp__unity__apply_text_edits` (non‑overlapping ranges) - **Hash-only**: `mcp__unity__get_sha` — returns `{sha256,lengthBytes,lastModifiedUtc}` without file body - **Validation**: `mcp__unity__validate_script(level:"standard")` @@ -49,7 +50,8 @@ CI provides: 5. **Composability**: Tests demonstrate how operations work together in real workflows **State Tracking:** -- Track file SHA after each test to ensure operations succeeded +- Track file SHA after each test (`mcp__unity__get_sha`) and use it as a precondition + for `apply_text_edits` in T‑F/T‑G/T‑I to exercise `stale_file` semantics. - Use content signatures (method names, comment markers) to verify expected state - Validate structural integrity after each major change @@ -85,7 +87,8 @@ CI provides: ### NL-3. End-of-Class Content (Additive State C) **Goal**: Demonstrate end-of-class insertions with smart brace matching **Actions**: -- Use anchor pattern to find the class-ending brace (accounts for previous additions) +- Match the final class-closing brace by scanning from EOF (e.g., last `^\s*}\s*$`) + or compute via `find_in_file` + ranges; insert immediately before it. - Insert three comment lines before final class brace: ``` // Tail test A @@ -135,9 +138,9 @@ CI provides: - **Expected final state**: State E + TestHelper() method before class end ### T-E. Method Evolution Lifecycle (Additive State G) -**Goal**: Insert → modify → finalize a method through multiple operations +**Goal**: Insert → modify → finalize a field + companion method **Actions**: -- Insert basic method: `private int Counter = 0;` +- Insert field: `private int Counter = 0;` - Update it: find and replace with `private int Counter = 42; // initialized` - Add companion method: `private void IncrementCounter() { Counter++; }` - **Expected final state**: State F + Counter field + IncrementCounter() method @@ -180,9 +183,12 @@ CI provides: ### T-J. Idempotency on Modified File (Additive State I) **Goal**: Verify operations behave predictably when repeated **Actions**: -- Add unique marker comment: `// idempotency test marker` -- Attempt to add same comment again (should detect no-op) -- Remove marker, attempt removal again (should handle gracefully) +- **Insert (structured)**: `mcp__unity__script_apply_edits` with: + `{"op":"anchor_insert","anchor":"// Tail test C","position":"after","text":"\n // idempotency test marker"}` +- **Insert again** (same op) → expect `no_op: true`. +- **Remove (structured)**: `{"op":"regex_replace","pattern":"(?m)^\\s*// idempotency test marker\\r?\\n?","text":""}` +- **Remove again** (same `regex_replace`) → expect `no_op: true`. +- `mcp__unity__validate_script(level:"standard")` - **Expected final state**: State H + verified idempotent behavior --- diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 5bdc573b..674c4f4e 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -14,7 +14,6 @@ concurrency: env: UNITY_VERSION: 2021.3.45f1 UNITY_IMAGE: unityci/editor:ubuntu-2021.3.45f1-linux-il2cpp-3 - UNITY_CACHE_ROOT: /home/runner/work/_temp/_github_home jobs: nl-suite: @@ -70,28 +69,68 @@ jobs: else echo "No MCP Python deps found (skipping)" fi - - # ---------- License prime on host (GameCI) ---------- - - name: Prime Unity license on host (GameCI) - if: steps.detect.outputs.unity_ok == 'true' - uses: game-ci/unity-test-runner@v4 + + # --- Licensing: works with ULF or Email/Password (EBL) --- + - name: Decide license mode + id: lic + shell: bash env: - UNITY_LICENSE: ${{ secrets.UNITY_LICENSE }} - UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }} + UNITY_LICENSE: ${{ secrets.UNITY_LICENSE }} + UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }} UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }} UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }} - with: - projectPath: TestProjects/UnityMCPTests - testMode: EditMode - customParameters: -runTests -testFilter __NoSuchTest__ -batchmode -nographics - unityVersion: ${{ env.UNITY_VERSION }} - - # (Optional) Inspect license caches - - name: Inspect GameCI license caches (host) - if: steps.detect.outputs.unity_ok == 'true' run: | - set -eux - find "${{ env.UNITY_CACHE_ROOT }}" -maxdepth 4 \( -path "*/.cache" -prune -o -type f \( -name '*.ulf' -o -name 'user.json' \) -print \) 2>/dev/null || true + set -eu + if [[ -n "${UNITY_LICENSE:-}" ]]; then + echo "mode=ulf" >> "$GITHUB_OUTPUT" + elif [[ -n "${UNITY_EMAIL:-}" && -n "${UNITY_PASSWORD:-}" ]]; then + echo "mode=ebl" >> "$GITHUB_OUTPUT" + else + echo "mode=none" >> "$GITHUB_OUTPUT" + fi + if [[ -n "${UNITY_SERIAL:-}" ]]; then + echo "has_serial=true" >> "$GITHUB_OUTPUT" + else + echo "has_serial=false" >> "$GITHUB_OUTPUT" + fi + + - name: Stage Unity .ulf license (from secret) + if: steps.lic.outputs.mode == 'ulf' + env: + UNITY_LICENSE: ${{ secrets.UNITY_LICENSE }} + shell: bash + run: | + set -eu + mkdir -p "$RUNNER_TEMP/unity-license-ulf" + # Accept raw text or base64 ULF + if echo "$UNITY_LICENSE" | base64 -d >/dev/null 2>&1; then + echo "$UNITY_LICENSE" | base64 -d > "$RUNNER_TEMP/unity-license-ulf/Unity_lic.ulf" + else + printf "%s" "$UNITY_LICENSE" > "$RUNNER_TEMP/unity-license-ulf/Unity_lic.ulf" + fi + + - name: Activate Unity (EBL on host) + if: steps.lic.outputs.mode == 'ebl' + uses: game-ci/unity-activate@v2 + env: + UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }} + UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }} + UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }} # optional (Pro/Enterprise) + + - name: Stage host license artifacts for container + if: steps.lic.outputs.mode == 'ebl' + shell: bash + run: | + set -euxo pipefail + mkdir -p "$RUNNER_TEMP/unity-license-ulf" "$RUNNER_TEMP/unity-license-xml" + # Serial-based license, if produced + ULF="$HOME/.local/share/unity3d/Unity/Unity_lic.ulf" + [[ -f "$ULF" ]] && cp "$ULF" "$RUNNER_TEMP/unity-license-ulf/" + # Named-user entitlement license (XML), if produced + XML_DIR="$HOME/.config/unity3d/Unity/licenses" + if [[ -d "$XML_DIR" ]]; then + cp -r "$XML_DIR/." "$RUNNER_TEMP/unity-license-xml/" + fi # ---------- Clean old MCP status ---------- - name: Clean old MCP status @@ -102,80 +141,52 @@ jobs: # ---------- Start headless Unity (persistent bridge) ---------- - name: Start Unity (persistent bridge) - if: steps.detect.outputs.unity_ok == 'true' + if: steps.lic.outputs.mode != 'none' + shell: bash env: - UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }} - UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }} - UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }} + UNITY_IMAGE: ${{ env.UNITY_IMAGE }} run: | - set -eu - if [ ! -d "${{ github.workspace }}/TestProjects/UnityMCPTests/ProjectSettings" ]; then - echo "Unity project not found; failing fast." - exit 1 - fi - mkdir -p "$HOME/.unity-mcp" - MANUAL_ARG=() - if [ -f "${UNITY_CACHE_ROOT}/.local/share/unity3d/Unity_lic.ulf" ]; then - MANUAL_ARG=(-manualLicenseFile /root/.local/share/unity3d/Unity_lic.ulf) + set -euxo pipefail + + MANUAL_ARG="" + if [[ -f "$RUNNER_TEMP/unity-license-ulf/Unity_lic.ulf" ]]; then + MANUAL_ARG="-manualLicenseFile /root/.local/share/unity3d/Unity/Unity_lic.ulf" fi - EBL_ARGS=() - [ -n "${UNITY_SERIAL:-}" ] && EBL_ARGS+=(-serial "$UNITY_SERIAL") - [ -n "${UNITY_EMAIL:-}" ] && EBL_ARGS+=(-username "$UNITY_EMAIL") - [ -n "${UNITY_PASSWORD:-}" ] && EBL_ARGS+=(-password "$UNITY_PASSWORD") + docker rm -f unity-mcp >/dev/null 2>&1 || true docker run -d --name unity-mcp --network host \ -e HOME=/root \ - -e UNITY_MCP_ALLOW_BATCH=1 -e UNITY_MCP_STATUS_DIR=/root/.unity-mcp \ + -e UNITY_MCP_ALLOW_BATCH=1 \ + -e UNITY_MCP_STATUS_DIR=/root/.unity-mcp \ -e UNITY_MCP_BIND_HOST=127.0.0.1 \ -v "${{ github.workspace }}:/workspace" -w /workspace \ - -v "${{ env.UNITY_CACHE_ROOT }}:/root" \ - -v "$HOME/.unity-mcp:/root/.unity-mcp" \ - ${{ env.UNITY_IMAGE }} /opt/unity/Editor/Unity -batchmode -nographics -logFile - \ + -v "$RUNNER_TEMP/unity-license-ulf:/root/.local/share/unity3d/Unity:ro" \ + -v "$RUNNER_TEMP/unity-license-xml:/root/.config/unity3d/Unity/licenses:ro" \ + "$UNITY_IMAGE" /opt/unity/Editor/Unity -batchmode -nographics -logFile - \ -stackTraceLogType Full \ -projectPath /workspace/TestProjects/UnityMCPTests \ - "${MANUAL_ARG[@]}" \ - "${EBL_ARGS[@]}" \ + ${MANUAL_ARG} \ -executeMethod MCPForUnity.Editor.MCPForUnityBridge.StartAutoConnect # ---------- Wait for Unity bridge ---------- - - name: Wait for Unity bridge (robust) - if: steps.detect.outputs.unity_ok == 'true' + - name: Wait for Unity bridge (fast fail if licensing breaks) + shell: bash run: | - set -euo pipefail - if ! docker ps --format '{{.Names}}' | grep -qx 'unity-mcp'; then - echo "Unity container failed to start"; docker ps -a || true; exit 1 - fi - docker logs -f unity-mcp 2>&1 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' & LOGPID=$! - deadline=$((SECONDS+420)); READY=0 - try_connect_host() { - P="$1" - timeout 1 bash -lc "exec 3<>/dev/tcp/127.0.0.1/$P; head -c 8 <&3 >/dev/null" && return 0 || true - if command -v nc >/dev/null 2>&1; then nc -6 -z ::1 "$P" && return 0 || true; fi - return 1 - } - while [ $SECONDS -lt $deadline ]; do - if docker logs unity-mcp 2>&1 | grep -qE "MCP Bridge listening|Bridge ready|Server started"; then - READY=1; echo "Bridge ready (log markers)"; break - fi - PORT=$(python3 -c "import os,glob,json,sys,time; b=os.path.expanduser('~/.unity-mcp'); fs=sorted(glob.glob(os.path.join(b,'unity-mcp-status-*.json')), key=os.path.getmtime, reverse=True); print(next((json.load(open(f,'r',encoding='utf-8')).get('unity_port') for f in fs if time.time()-os.path.getmtime(f)<=300 and json.load(open(f,'r',encoding='utf-8')).get('unity_port')), '' ))" 2>/dev/null || true) - if [ -n "${PORT:-}" ] && { try_connect_host "$PORT" || docker exec unity-mcp bash -lc "timeout 1 bash -lc 'exec 3<>/dev/tcp/127.0.0.1/$PORT' || (command -v nc >/dev/null 2>&1 && nc -6 -z ::1 $PORT)"; }; then - READY=1; echo "Bridge ready on port $PORT"; break - fi - if docker logs unity-mcp 2>&1 | grep -qE "No valid Unity Editor license|Token not found in cache|com\.unity\.editor\.headless"; then - echo "Licensing error detected"; break - fi - sleep 2 - done - kill $LOGPID || true - if [ "$READY" != "1" ]; then - echo "Bridge not ready; diagnostics:" - echo "== status files =="; ls -la "$HOME/.unity-mcp" || true - echo "== status contents =="; for f in "$HOME"/.unity-mcp/unity-mcp-status-*.json; do [ -f "$f" ] && { echo "--- $f"; sed -n '1,120p' "$f"; }; done - echo "== sockets (inside container) =="; docker exec unity-mcp bash -lc 'ss -lntp || netstat -tulpen || true' - echo "== tail of Unity log ==" - docker logs --tail 200 unity-mcp | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' || true - exit 1 - fi + set -eu + timeout 90s bash -c ' + until docker logs unity-mcp 2>&1 | egrep -q "Entitlement|License|listening|ready"; do + sleep 2 + done + ' + docker logs unity-mcp --tail 200 + + - name: Return Pro license (if used) + if: always() && steps.lic.outputs.mode == 'ebl' && steps.lic.outputs.has_serial == 'true' + uses: game-ci/unity-return-license@v2 + env: + UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }} + UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }} + UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }} # ---------- MCP client config ---------- - name: Write MCP config (.claude/mcp.json) From 6e2a1b3a25f6006efcf859cf79bdfda2a5c356c4 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 15:36:18 -0700 Subject: [PATCH 02/28] CI: support both ULF + EBL; validate ULF before -manualLicenseFile; robust readiness wait; use game-ci @v2 actions --- .github/workflows/claude-nl-suite.yml | 78 +++++++++++++++------------ 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 674c4f4e..c45d067d 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -70,66 +70,70 @@ jobs: echo "No MCP Python deps found (skipping)" fi - # --- Licensing: works with ULF or Email/Password (EBL) --- - - name: Decide license mode + # --- Licensing: allow both ULF and EBL when available --- + - name: Decide license sources id: lic shell: bash env: - UNITY_LICENSE: ${{ secrets.UNITY_LICENSE }} - UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }} + UNITY_LICENSE: ${{ secrets.UNITY_LICENSE }} + UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }} UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }} UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }} run: | set -eu - if [[ -n "${UNITY_LICENSE:-}" ]]; then - echo "mode=ulf" >> "$GITHUB_OUTPUT" - elif [[ -n "${UNITY_EMAIL:-}" && -n "${UNITY_PASSWORD:-}" ]]; then - echo "mode=ebl" >> "$GITHUB_OUTPUT" - else - echo "mode=none" >> "$GITHUB_OUTPUT" - fi - if [[ -n "${UNITY_SERIAL:-}" ]]; then - echo "has_serial=true" >> "$GITHUB_OUTPUT" - else - echo "has_serial=false" >> "$GITHUB_OUTPUT" - fi + use_ulf=false; use_ebl=false + [[ -n "${UNITY_LICENSE:-}" ]] && use_ulf=true + [[ -n "${UNITY_EMAIL:-}" && -n "${UNITY_PASSWORD:-}" ]] && use_ebl=true + echo "use_ulf=$use_ulf" >> "$GITHUB_OUTPUT" + echo "use_ebl=$use_ebl" >> "$GITHUB_OUTPUT" + echo "has_serial=$([[ -n \"${UNITY_SERIAL:-}\" ]] && echo true || echo false)" >> "$GITHUB_OUTPUT" - name: Stage Unity .ulf license (from secret) - if: steps.lic.outputs.mode == 'ulf' + if: steps.lic.outputs.use_ulf == 'true' + id: ulf env: UNITY_LICENSE: ${{ secrets.UNITY_LICENSE }} shell: bash run: | set -eu - mkdir -p "$RUNNER_TEMP/unity-license-ulf" - # Accept raw text or base64 ULF + mkdir -p "$RUNNER_TEMP/unity-license-ulf" "$RUNNER_TEMP/unity-license-xml" + f="$RUNNER_TEMP/unity-license-ulf/Unity_lic.ulf" if echo "$UNITY_LICENSE" | base64 -d >/dev/null 2>&1; then - echo "$UNITY_LICENSE" | base64 -d > "$RUNNER_TEMP/unity-license-ulf/Unity_lic.ulf" + echo "$UNITY_LICENSE" | base64 -d > "$f" + else + printf "%s" "$UNITY_LICENSE" > "$f" + fi + # If someone pasted an entitlement XML into UNITY_LICENSE by mistake, re-home it: + if head -c 100 "$f" | grep -qi '<\?xml'; then + mv "$f" "$RUNNER_TEMP/unity-license-xml/UnityEntitlementLicense.xml" + echo "ok=false" >> "$GITHUB_OUTPUT" + elif grep -qi '' "$f"; then + echo "ok=true" >> "$GITHUB_OUTPUT" else - printf "%s" "$UNITY_LICENSE" > "$RUNNER_TEMP/unity-license-ulf/Unity_lic.ulf" + echo "ok=false" >> "$GITHUB_OUTPUT" fi - name: Activate Unity (EBL on host) - if: steps.lic.outputs.mode == 'ebl' + if: steps.lic.outputs.use_ebl == 'true' uses: game-ci/unity-activate@v2 + with: + unityVersion: ${{ env.UNITY_VERSION }} env: UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }} UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }} - UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }} # optional (Pro/Enterprise) + UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }} # optional for Pro - name: Stage host license artifacts for container - if: steps.lic.outputs.mode == 'ebl' + if: steps.lic.outputs.use_ebl == 'true' shell: bash run: | set -euxo pipefail mkdir -p "$RUNNER_TEMP/unity-license-ulf" "$RUNNER_TEMP/unity-license-xml" - # Serial-based license, if produced ULF="$HOME/.local/share/unity3d/Unity/Unity_lic.ulf" - [[ -f "$ULF" ]] && cp "$ULF" "$RUNNER_TEMP/unity-license-ulf/" - # Named-user entitlement license (XML), if produced + [[ -f "$ULF" ]] && cp -f "$ULF" "$RUNNER_TEMP/unity-license-ulf/" XML_DIR="$HOME/.config/unity3d/Unity/licenses" if [[ -d "$XML_DIR" ]]; then - cp -r "$XML_DIR/." "$RUNNER_TEMP/unity-license-xml/" + cp -rf "$XML_DIR/." "$RUNNER_TEMP/unity-license-xml/" fi # ---------- Clean old MCP status ---------- @@ -141,15 +145,14 @@ jobs: # ---------- Start headless Unity (persistent bridge) ---------- - name: Start Unity (persistent bridge) - if: steps.lic.outputs.mode != 'none' + if: steps.lic.outputs.use_ulf == 'true' || steps.lic.outputs.use_ebl == 'true' shell: bash env: UNITY_IMAGE: ${{ env.UNITY_IMAGE }} run: | set -euxo pipefail - MANUAL_ARG="" - if [[ -f "$RUNNER_TEMP/unity-license-ulf/Unity_lic.ulf" ]]; then + if [[ "${{ steps.ulf.outputs.ok || 'false' }}" == "true" ]]; then MANUAL_ARG="-manualLicenseFile /root/.local/share/unity3d/Unity/Unity_lic.ulf" fi @@ -169,19 +172,24 @@ jobs: -executeMethod MCPForUnity.Editor.MCPForUnityBridge.StartAutoConnect # ---------- Wait for Unity bridge ---------- - - name: Wait for Unity bridge (fast fail if licensing breaks) + - name: Wait for Unity bridge (real readiness & fast fail) shell: bash run: | set -eu - timeout 90s bash -c ' - until docker logs unity-mcp 2>&1 | egrep -q "Entitlement|License|listening|ready"; do + timeout 300s bash -lc ' + ok_pat="(MCPForUnity|MCP For Unity|AutoConnect).*listening|ready" + err_pat="No valid Unity|cannot load ULF|Token not found|0 entitlement|headless.*not found|Batchmode quit|Aborting batchmode|Fatal error" + while :; do + l=$(docker logs unity-mcp 2>&1 || true) + echo "$l" | grep -qiE "$ok_pat" && exit 0 + echo "$l" | grep -qiE "$err_pat" && { echo "$l" | tail -n 200; exit 1; } sleep 2 done ' docker logs unity-mcp --tail 200 - name: Return Pro license (if used) - if: always() && steps.lic.outputs.mode == 'ebl' && steps.lic.outputs.has_serial == 'true' + if: always() && steps.lic.outputs.use_ebl == 'true' && steps.lic.outputs.has_serial == 'true' uses: game-ci/unity-return-license@v2 env: UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }} From 3a33f75c2f8b38ca1c6cac42da809cf1e02cc278 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 15:50:50 -0700 Subject: [PATCH 03/28] CI: activate EBL via container using UNITY_IMAGE; fix readiness regex grouping --- .github/workflows/claude-nl-suite.yml | 33 ++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index c45d067d..11ed42a8 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -113,15 +113,36 @@ jobs: echo "ok=false" >> "$GITHUB_OUTPUT" fi - - name: Activate Unity (EBL on host) + # --- Activate via EBL inside the same Unity image (writes host-side entitlement) --- + - name: Activate Unity (EBL via container — host-mount) if: steps.lic.outputs.use_ebl == 'true' - uses: game-ci/unity-activate@v2 - with: - unityVersion: ${{ env.UNITY_VERSION }} + shell: bash env: + UNITY_IMAGE: ${{ env.UNITY_IMAGE }} UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }} UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }} - UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }} # optional for Pro + UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }} + run: | + set -euxo pipefail + # host dir to receive the entitlement XML + mkdir -p "$RUNNER_TEMP/unity-license-xml" + + # Try Pro first if serial is present, otherwise named-user EBL. + docker run --rm --network host \ + -e HOME=/root \ + -e UNITY_EMAIL -e UNITY_PASSWORD -e UNITY_SERIAL \ + -v "$RUNNER_TEMP/unity-license-xml:/root/.config/unity3d/Unity/licenses" \ + "$UNITY_IMAGE" bash -lc ' + set -euxo pipefail + if [[ -n "${UNITY_SERIAL:-}" ]]; then + /opt/unity/Editor/Unity -batchmode -nographics -logFile - \ + -username "$UNITY_EMAIL" -password "$UNITY_PASSWORD" -serial "$UNITY_SERIAL" -quit || true + else + /opt/unity/Editor/Unity -batchmode -nographics -logFile - \ + -username "$UNITY_EMAIL" -password "$UNITY_PASSWORD" -quit || true + fi + ls -la /root/.config/unity3d/Unity/licenses || true + ' - name: Stage host license artifacts for container if: steps.lic.outputs.use_ebl == 'true' @@ -177,7 +198,7 @@ jobs: run: | set -eu timeout 300s bash -lc ' - ok_pat="(MCPForUnity|MCP For Unity|AutoConnect).*listening|ready" + ok_pat="(MCPForUnity|MCP For Unity|AutoConnect).*(listening|ready)" err_pat="No valid Unity|cannot load ULF|Token not found|0 entitlement|headless.*not found|Batchmode quit|Aborting batchmode|Fatal error" while :; do l=$(docker logs unity-mcp 2>&1 || true) From f9141e4086e786a22f717d05df338c92c9ba6a7b Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 15:53:26 -0700 Subject: [PATCH 04/28] =?UTF-8?q?CI:=20minimal=20patch=20=E2=80=94=20guard?= =?UTF-8?q?=20manualLicenseFile=20by=20ulf.ok,=20expand=20error=20patterns?= =?UTF-8?q?,=20keep=20return-license=20@v2=20for=20linter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/claude-nl-suite.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 11ed42a8..f2702ad8 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -173,7 +173,7 @@ jobs: run: | set -euxo pipefail MANUAL_ARG="" - if [[ "${{ steps.ulf.outputs.ok || 'false' }}" == "true" ]]; then + if [[ "${{ steps.ulf.outputs.ok }}" == "true" ]]; then MANUAL_ARG="-manualLicenseFile /root/.local/share/unity3d/Unity/Unity_lic.ulf" fi @@ -199,7 +199,7 @@ jobs: set -eu timeout 300s bash -lc ' ok_pat="(MCPForUnity|MCP For Unity|AutoConnect).*(listening|ready)" - err_pat="No valid Unity|cannot load ULF|Token not found|0 entitlement|headless.*not found|Batchmode quit|Aborting batchmode|Fatal error" + err_pat="No valid Unity|License is not active|cannot load ULF|Signature element not found|Token not found|0 entitlement|headless.*not found|Batchmode quit|Aborting batchmode|Fatal error" while :; do l=$(docker logs unity-mcp 2>&1 || true) echo "$l" | grep -qiE "$ok_pat" && exit 0 From 6a253eded3760578e99a859039a24d42fdf6da6e Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 15:57:50 -0700 Subject: [PATCH 05/28] CI: harden ULF staging (printf+chmod); pass ULF_OK via env; use manual_args array for -manualLicenseFile --- .github/workflows/claude-nl-suite.yml | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index f2702ad8..08a09f6d 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -98,11 +98,12 @@ jobs: set -eu mkdir -p "$RUNNER_TEMP/unity-license-ulf" "$RUNNER_TEMP/unity-license-xml" f="$RUNNER_TEMP/unity-license-ulf/Unity_lic.ulf" - if echo "$UNITY_LICENSE" | base64 -d >/dev/null 2>&1; then - echo "$UNITY_LICENSE" | base64 -d > "$f" + if printf "%s" "$UNITY_LICENSE" | base64 -d - >/dev/null 2>&1; then + printf "%s" "$UNITY_LICENSE" | base64 -d - > "$f" else printf "%s" "$UNITY_LICENSE" > "$f" fi + chmod 600 "$f" || true # If someone pasted an entitlement XML into UNITY_LICENSE by mistake, re-home it: if head -c 100 "$f" | grep -qi '<\?xml'; then mv "$f" "$RUNNER_TEMP/unity-license-xml/UnityEntitlementLicense.xml" @@ -170,11 +171,12 @@ jobs: shell: bash env: UNITY_IMAGE: ${{ env.UNITY_IMAGE }} + ULF_OK: ${{ steps.ulf.outputs.ok }} run: | set -euxo pipefail - MANUAL_ARG="" - if [[ "${{ steps.ulf.outputs.ok }}" == "true" ]]; then - MANUAL_ARG="-manualLicenseFile /root/.local/share/unity3d/Unity/Unity_lic.ulf" + manual_args=() + if [[ "${ULF_OK:-false}" == "true" ]]; then + manual_args=(-manualLicenseFile "/root/.local/share/unity3d/Unity/Unity_lic.ulf") fi docker rm -f unity-mcp >/dev/null 2>&1 || true @@ -189,7 +191,7 @@ jobs: "$UNITY_IMAGE" /opt/unity/Editor/Unity -batchmode -nographics -logFile - \ -stackTraceLogType Full \ -projectPath /workspace/TestProjects/UnityMCPTests \ - ${MANUAL_ARG} \ + "${manual_args[@]}" \ -executeMethod MCPForUnity.Editor.MCPForUnityBridge.StartAutoConnect # ---------- Wait for Unity bridge ---------- @@ -198,16 +200,19 @@ jobs: run: | set -eu timeout 300s bash -lc ' - ok_pat="(MCPForUnity|MCP For Unity|AutoConnect).*(listening|ready)" - err_pat="No valid Unity|License is not active|cannot load ULF|Signature element not found|Token not found|0 entitlement|headless.*not found|Batchmode quit|Aborting batchmode|Fatal error" + ok_pat="((MCPForUnity|MCP For Unity|AutoConnect).*(listening|ready))" + err_pat="No valid Unity|License is not active|cannot load ULF|Signature element not found|Token not found|0 entitlement|Entitlement.*(failed|error|denied)|License.*(failed|error|expired|denied)|headless.*not found|Batchmode quit|Aborting batchmode|Fatal error" while :; do l=$(docker logs unity-mcp 2>&1 || true) echo "$l" | grep -qiE "$ok_pat" && exit 0 - echo "$l" | grep -qiE "$err_pat" && { echo "$l" | tail -n 200; exit 1; } + echo "$l" | grep -qiE "$err_pat" && { + echo "$l" | tail -n 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig'; + exit 1; + } sleep 2 done ' - docker logs unity-mcp --tail 200 + docker logs unity-mcp --tail 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' || true - name: Return Pro license (if used) if: always() && steps.lic.outputs.use_ebl == 'true' && steps.lic.outputs.has_serial == 'true' From e69b4604170e1bb43085802accd515e5281f5397 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 16:09:46 -0700 Subject: [PATCH 06/28] CI: assert EBL activation writes entitlement to host mount; fail fast if missing --- .github/workflows/claude-nl-suite.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 08a09f6d..37fb9afb 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -145,6 +145,12 @@ jobs: ls -la /root/.config/unity3d/Unity/licenses || true ' + # Verify entitlement written to host mount + if ! find "$RUNNER_TEMP/unity-license-xml" -maxdepth 1 -type f \( -iname '*.xml' -o -iname '*.ulf' \) | grep -q .; then + echo "Unity EBL activation did not produce an entitlement on host mount: $RUNNER_TEMP/unity-license-xml" >&2 + exit 1 + fi + - name: Stage host license artifacts for container if: steps.lic.outputs.use_ebl == 'true' shell: bash From cb6c69431a96f3c0ce8282a3b978c01dc1d4a29d Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 16:13:21 -0700 Subject: [PATCH 07/28] CI: use heredoc in wait step to avoid nested-quote issues; remove redundant EBL artifact copy; drop job-level if and unused UNITY_VERSION --- .github/workflows/claude-nl-suite.yml | 42 ++++++++++----------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 37fb9afb..91065f3c 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -12,12 +12,10 @@ concurrency: cancel-in-progress: true env: - UNITY_VERSION: 2021.3.45f1 UNITY_IMAGE: unityci/editor:ubuntu-2021.3.45f1-linux-il2cpp-3 jobs: nl-suite: - if: github.event_name == 'workflow_dispatch' runs-on: ubuntu-latest timeout-minutes: 60 env: @@ -151,18 +149,7 @@ jobs: exit 1 fi - - name: Stage host license artifacts for container - if: steps.lic.outputs.use_ebl == 'true' - shell: bash - run: | - set -euxo pipefail - mkdir -p "$RUNNER_TEMP/unity-license-ulf" "$RUNNER_TEMP/unity-license-xml" - ULF="$HOME/.local/share/unity3d/Unity/Unity_lic.ulf" - [[ -f "$ULF" ]] && cp -f "$ULF" "$RUNNER_TEMP/unity-license-ulf/" - XML_DIR="$HOME/.config/unity3d/Unity/licenses" - if [[ -d "$XML_DIR" ]]; then - cp -rf "$XML_DIR/." "$RUNNER_TEMP/unity-license-xml/" - fi + # EBL entitlement is already written directly to $RUNNER_TEMP/unity-license-xml by the activation step # ---------- Clean old MCP status ---------- - name: Clean old MCP status @@ -205,19 +192,20 @@ jobs: shell: bash run: | set -eu - timeout 300s bash -lc ' - ok_pat="((MCPForUnity|MCP For Unity|AutoConnect).*(listening|ready))" - err_pat="No valid Unity|License is not active|cannot load ULF|Signature element not found|Token not found|0 entitlement|Entitlement.*(failed|error|denied)|License.*(failed|error|expired|denied)|headless.*not found|Batchmode quit|Aborting batchmode|Fatal error" - while :; do - l=$(docker logs unity-mcp 2>&1 || true) - echo "$l" | grep -qiE "$ok_pat" && exit 0 - echo "$l" | grep -qiE "$err_pat" && { - echo "$l" | tail -n 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig'; - exit 1; - } - sleep 2 - done - ' + timeout 300s bash <<'BASH' + set -euo pipefail + ok_pat='((MCPForUnity|MCP For Unity|AutoConnect).*(listening|ready))' + err_pat='No valid Unity|License is not active|cannot load ULF|Signature element not found|Token not found|0 entitlement|Entitlement.*(failed|error|denied)|License.*(failed|error|expired|denied)|headless.*not found|Batchmode quit|Aborting batchmode|Fatal error' + while :; do + l="$(docker logs unity-mcp 2>&1 || true)" + if echo "$l" | grep -qiE "$ok_pat"; then exit 0; fi + if echo "$l" | grep -qiE "$err_pat"; then + echo "$l" | tail -n 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' + exit 1 + fi + sleep 2 + done + BASH docker logs unity-mcp --tail 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' || true - name: Return Pro license (if used) From 124a87247abbaf82d62fc3681e41b6955cbf70b9 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 16:23:05 -0700 Subject: [PATCH 08/28] CI: harden wait step (container status check, broader ready patterns, longer timeout); make license return non-blocking --- .github/workflows/claude-nl-suite.yml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 91065f3c..47bd1059 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -192,10 +192,17 @@ jobs: shell: bash run: | set -eu - timeout 300s bash <<'BASH' + timeout 600s bash <<'BASH' set -euo pipefail - ok_pat='((MCPForUnity|MCP For Unity|AutoConnect).*(listening|ready))' + ok_pat='(MCP(For)?Unity|AutoConnect|Bridge).*(listening|ready|started|port)' err_pat='No valid Unity|License is not active|cannot load ULF|Signature element not found|Token not found|0 entitlement|Entitlement.*(failed|error|denied)|License.*(failed|error|expired|denied)|headless.*not found|Batchmode quit|Aborting batchmode|Fatal error' + # If container already exited, fail fast with logs + st="$(docker inspect -f '{{.State.Status}} {{.State.ExitCode}}' unity-mcp 2>/dev/null || true)" + case "$st" in + exited*|dead*) + docker logs unity-mcp --tail 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' + exit 1;; + esac while :; do l="$(docker logs unity-mcp 2>&1 || true)" if echo "$l" | grep -qiE "$ok_pat"; then exit 0; fi @@ -211,6 +218,7 @@ jobs: - name: Return Pro license (if used) if: always() && steps.lic.outputs.use_ebl == 'true' && steps.lic.outputs.has_serial == 'true' uses: game-ci/unity-return-license@v2 + continue-on-error: true env: UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }} UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }} From 6f8695bd5c4f3dc893f1576c8ed640698251cdc0 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 16:43:03 -0700 Subject: [PATCH 09/28] =?UTF-8?q?CI:=20wait=20step=20=E2=80=94=20confirm?= =?UTF-8?q?=20bridge=20readiness=20via=20status=20JSON=20(unity=5Fport)=20?= =?UTF-8?q?+=20host=20socket=20probe?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/claude-nl-suite.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 47bd1059..681603d0 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -206,6 +206,16 @@ jobs: while :; do l="$(docker logs unity-mcp 2>&1 || true)" if echo "$l" | grep -qiE "$ok_pat"; then exit 0; fi + # Alternate readiness: parse bridge status JSON for unity_port and verify TCP connect on host + port="$(docker exec unity-mcp bash -lc 'python3 - </dev/null || true)" + if [[ -n "$port" ]]; then + python3 - < Date: Fri, 5 Sep 2025 16:51:14 -0700 Subject: [PATCH 10/28] CI: YAML-safe readiness fallback (grep/sed unity_port + bash TCP probe); workflow_dispatch trigger + ASCII step names --- .github/workflows/claude-nl-suite.yml | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 681603d0..a919f86e 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -1,7 +1,6 @@ name: Claude NL/T Full Suite (Unity live) -on: - workflow_dispatch: {} +on: [workflow_dispatch] permissions: contents: read @@ -113,7 +112,7 @@ jobs: fi # --- Activate via EBL inside the same Unity image (writes host-side entitlement) --- - - name: Activate Unity (EBL via container — host-mount) + - name: Activate Unity (EBL via container - host-mount) if: steps.lic.outputs.use_ebl == 'true' shell: bash env: @@ -206,15 +205,10 @@ jobs: while :; do l="$(docker logs unity-mcp 2>&1 || true)" if echo "$l" | grep -qiE "$ok_pat"; then exit 0; fi - # Alternate readiness: parse bridge status JSON for unity_port and verify TCP connect on host - port="$(docker exec unity-mcp bash -lc 'python3 - </dev/null || true)" - if [[ -n "$port" ]]; then - python3 - </dev/null || true)" + if [[ -n "$port" ]] && timeout 1 bash -lc "exec 3<>/dev/tcp/127.0.0.1/$port"; then + exit 0 fi if echo "$l" | grep -qiE "$err_pat"; then echo "$l" | tail -n 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' @@ -535,7 +529,7 @@ PY p.write_text(s, encoding='utf-8', newline='\n') PY - - name: NL/T details → Job Summary + - name: NL/T details -> Job Summary if: always() run: | echo "## Unity NL/T Editing Suite — Summary" >> $GITHUB_STEP_SUMMARY From 2ffcb560c404f0c0c21d0114112d4b375472d0e7 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 16:55:55 -0700 Subject: [PATCH 11/28] CI: refine license error pattern to ignore benign LicensingClient channel startup; only match true activation/return failures --- .github/workflows/claude-nl-suite.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index a919f86e..009d1a41 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -194,7 +194,9 @@ jobs: timeout 600s bash <<'BASH' set -euo pipefail ok_pat='(MCP(For)?Unity|AutoConnect|Bridge).*(listening|ready|started|port)' - err_pat='No valid Unity|License is not active|cannot load ULF|Signature element not found|Token not found|0 entitlement|Entitlement.*(failed|error|denied)|License.*(failed|error|expired|denied)|headless.*not found|Batchmode quit|Aborting batchmode|Fatal error' + # Be strict on actual license failures; avoid benign startup messages like + # "Licensing::IpcConnector ... failed because channel doesn't exist" before client launch + err_pat='No valid Unity|License is not active|cannot load ULF|Signature element not found|Token not found|0 entitlement|Entitlement.*(failed|error|denied)|License (activation|return|renewal).*(failed|error|expired|denied)|headless.*not found|Batchmode quit|Aborting batchmode|Fatal error' # If container already exited, fail fast with logs st="$(docker inspect -f '{{.State.Status}} {{.State.ExitCode}}' unity-mcp 2>/dev/null || true)" case "$st" in From 3e896669e47576888ace85f67c78b3d443c86b51 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 17:30:10 -0700 Subject: [PATCH 12/28] Improve Unity bridge wait logic in CI workflow - Increase timeout from 600s to 900s for Unity startup - Add 'bound' to readiness pattern to catch more bridge signals - Refine error detection to focus only on license failures - Remove non-license error patterns that could cause false failures - Improve error reporting with descriptive messages - Fix regex escaping for unity port parsing - Fix case sensitivity in sed commands --- .github/workflows/claude-nl-suite.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 009d1a41..5748a09d 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -191,35 +191,35 @@ jobs: shell: bash run: | set -eu - timeout 600s bash <<'BASH' + timeout 900s bash <<'BASH' set -euo pipefail - ok_pat='(MCP(For)?Unity|AutoConnect|Bridge).*(listening|ready|started|port)' - # Be strict on actual license failures; avoid benign startup messages like - # "Licensing::IpcConnector ... failed because channel doesn't exist" before client launch - err_pat='No valid Unity|License is not active|cannot load ULF|Signature element not found|Token not found|0 entitlement|Entitlement.*(failed|error|denied)|License (activation|return|renewal).*(failed|error|expired|denied)|headless.*not found|Batchmode quit|Aborting batchmode|Fatal error' + ok_pat='(MCP(For)?Unity|AutoConnect|Bridge).*(listening|ready|started|port|bound)' + # Only license-fatal signals should abort the wait: + license_err='No valid Unity|License is not active|cannot load ULF|Signature element not found|Token not found|0 entitlement|Entitlement.*(failed|error|denied)|License (activation|return|renewal).*(failed|error|expired|denied)' # If container already exited, fail fast with logs st="$(docker inspect -f '{{.State.Status}} {{.State.ExitCode}}' unity-mcp 2>/dev/null || true)" case "$st" in exited*|dead*) - docker logs unity-mcp --tail 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' + docker logs unity-mcp --tail 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig' exit 1;; esac while :; do l="$(docker logs unity-mcp 2>&1 || true)" if echo "$l" | grep -qiE "$ok_pat"; then exit 0; fi # Alternate readiness: parse unity_port from status JSON and verify host TCP connect - port="$(docker exec unity-mcp bash -lc 'shopt -s nullglob; for f in /root/.unity-mcp/unity-mcp-status-*.json; do grep -ho "\"unity_port\"\s*:\s*[0-9]\+" "$f"; done | sed -E "s/.*: *([0-9]+).*/\1/" | head -n1' 2>/dev/null || true)" + port="$(docker exec unity-mcp bash -lc 'shopt -s nullglob; for f in /root/.unity-mcp/unity-mcp-status-*.json; do grep -ho "\"unity_port\"\\s*:\\s*[0-9]\\+" "$f"; done | sed -E "s/.*: *([0-9]+).*/\\1/" | head -n1' 2>/dev/null || true)" if [[ -n "$port" ]] && timeout 1 bash -lc "exec 3<>/dev/tcp/127.0.0.1/$port"; then exit 0 fi - if echo "$l" | grep -qiE "$err_pat"; then - echo "$l" | tail -n 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' + if echo "$l" | grep -qiE "$license_err"; then + echo "License failure matched by wait-gate:" >&2 + echo "$l" | tail -n 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig' >&2 exit 1 fi sleep 2 done BASH - docker logs unity-mcp --tail 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' || true + docker logs unity-mcp --tail 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig' || true - name: Return Pro license (if used) if: always() && steps.lic.outputs.use_ebl == 'true' && steps.lic.outputs.has_serial == 'true' From ece0836923d5e4b73c9be7e831bc9eb004f41fe8 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 17:32:52 -0700 Subject: [PATCH 13/28] Add comprehensive Unity workflow improvements - Add project warm-up step to pre-import Library before bridge startup - Expand license mounts to capture full Unity config and local-share directories - Update bridge container to use expanded directory mounts instead of narrow license paths - Provide ULF licenses in both legacy and standard local-share paths - Improve EBL activation to capture complete Unity authentication context - Update verification logic to check full config directories for entitlements These changes eliminate cold import delays during bridge startup and provide Unity with all necessary authentication data, reducing edge cases and improving overall workflow reliability. --- .github/workflows/claude-nl-suite.yml | 47 +++++++++++++++++++++------ 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 5748a09d..3d46e107 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -93,7 +93,7 @@ jobs: shell: bash run: | set -eu - mkdir -p "$RUNNER_TEMP/unity-license-ulf" "$RUNNER_TEMP/unity-license-xml" + mkdir -p "$RUNNER_TEMP/unity-license-ulf" "$RUNNER_TEMP/unity-local/Unity" f="$RUNNER_TEMP/unity-license-ulf/Unity_lic.ulf" if printf "%s" "$UNITY_LICENSE" | base64 -d - >/dev/null 2>&1; then printf "%s" "$UNITY_LICENSE" | base64 -d - > "$f" @@ -103,9 +103,12 @@ jobs: chmod 600 "$f" || true # If someone pasted an entitlement XML into UNITY_LICENSE by mistake, re-home it: if head -c 100 "$f" | grep -qi '<\?xml'; then - mv "$f" "$RUNNER_TEMP/unity-license-xml/UnityEntitlementLicense.xml" + mkdir -p "$RUNNER_TEMP/unity-config/Unity/licenses" + mv "$f" "$RUNNER_TEMP/unity-config/Unity/licenses/UnityEntitlementLicense.xml" echo "ok=false" >> "$GITHUB_OUTPUT" elif grep -qi '' "$f"; then + # provide it in the standard local-share path too + cp -f "$f" "$RUNNER_TEMP/unity-local/Unity/Unity_lic.ulf" echo "ok=true" >> "$GITHUB_OUTPUT" else echo "ok=false" >> "$GITHUB_OUTPUT" @@ -122,14 +125,15 @@ jobs: UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }} run: | set -euxo pipefail - # host dir to receive the entitlement XML - mkdir -p "$RUNNER_TEMP/unity-license-xml" + # host dirs to receive the full Unity config and local-share + mkdir -p "$RUNNER_TEMP/unity-config" "$RUNNER_TEMP/unity-local" # Try Pro first if serial is present, otherwise named-user EBL. docker run --rm --network host \ -e HOME=/root \ -e UNITY_EMAIL -e UNITY_PASSWORD -e UNITY_SERIAL \ - -v "$RUNNER_TEMP/unity-license-xml:/root/.config/unity3d/Unity/licenses" \ + -v "$RUNNER_TEMP/unity-config:/root/.config/unity3d" \ + -v "$RUNNER_TEMP/unity-local:/root/.local/share/unity3d" \ "$UNITY_IMAGE" bash -lc ' set -euxo pipefail if [[ -n "${UNITY_SERIAL:-}" ]]; then @@ -143,12 +147,35 @@ jobs: ' # Verify entitlement written to host mount - if ! find "$RUNNER_TEMP/unity-license-xml" -maxdepth 1 -type f \( -iname '*.xml' -o -iname '*.ulf' \) | grep -q .; then - echo "Unity EBL activation did not produce an entitlement on host mount: $RUNNER_TEMP/unity-license-xml" >&2 + if ! find "$RUNNER_TEMP/unity-config" -type f -iname "*.xml" | grep -q .; then + echo "Unity EBL activation did not produce an entitlement on host mount: $RUNNER_TEMP/unity-config" >&2 exit 1 fi - # EBL entitlement is already written directly to $RUNNER_TEMP/unity-license-xml by the activation step + # EBL entitlement is already written directly to $RUNNER_TEMP/unity-config by the activation step + + # ---------- Warm up project (import Library once) ---------- + - name: Warm up project (import Library once) + if: steps.lic.outputs.use_ulf == 'true' || steps.lic.outputs.use_ebl == 'true' + shell: bash + env: + UNITY_IMAGE: ${{ env.UNITY_IMAGE }} + ULF_OK: ${{ steps.ulf.outputs.ok }} + run: | + set -euxo pipefail + manual_args=() + if [[ "${ULF_OK:-false}" == "true" ]]; then + manual_args=(-manualLicenseFile "/root/.local/share/unity3d/Unity/Unity_lic.ulf") + fi + docker run --rm --network host \ + -e HOME=/root \ + -v "${{ github.workspace }}:/workspace" -w /workspace \ + -v "$RUNNER_TEMP/unity-config:/root/.config/unity3d:ro" \ + -v "$RUNNER_TEMP/unity-local:/root/.local/share/unity3d:ro" \ + "$UNITY_IMAGE" /opt/unity/Editor/Unity -batchmode -nographics -logFile - \ + -projectPath /workspace/TestProjects/UnityMCPTests \ + "${manual_args[@]}" \ + -quit # ---------- Clean old MCP status ---------- - name: Clean old MCP status @@ -178,8 +205,8 @@ jobs: -e UNITY_MCP_STATUS_DIR=/root/.unity-mcp \ -e UNITY_MCP_BIND_HOST=127.0.0.1 \ -v "${{ github.workspace }}:/workspace" -w /workspace \ - -v "$RUNNER_TEMP/unity-license-ulf:/root/.local/share/unity3d/Unity:ro" \ - -v "$RUNNER_TEMP/unity-license-xml:/root/.config/unity3d/Unity/licenses:ro" \ + -v "$RUNNER_TEMP/unity-config:/root/.config/unity3d:ro" \ + -v "$RUNNER_TEMP/unity-local:/root/.local/share/unity3d:ro" \ "$UNITY_IMAGE" /opt/unity/Editor/Unity -batchmode -nographics -logFile - \ -stackTraceLogType Full \ -projectPath /workspace/TestProjects/UnityMCPTests \ From eadfa7bb672776ff7cd0045ca77a4dce5e15945c Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 17:36:36 -0700 Subject: [PATCH 14/28] Refine Unity workflow licensing and permissions - Make EBL verification conditional on ULF presence to allow ULF-only runs - Remove read-only mounts from warm-up container for Unity user directories - Align secrets gate with actual licensing requirements (remove UNITY_SERIAL only) - Keep return-license action at v2 (latest available version) These changes prevent workflow failures when EBL has issues but ULF is valid, allow Unity to write preferences during warm-up, and ensure secrets detection matches the actual licensing logic used by the workflow steps. --- .github/workflows/claude-nl-suite.yml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 3d46e107..46b42ae2 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -34,7 +34,7 @@ jobs: run: | set -e if [ -n "$ANTHROPIC_API_KEY" ]; then echo "anthropic_ok=true" >> "$GITHUB_OUTPUT"; else echo "anthropic_ok=false" >> "$GITHUB_OUTPUT"; fi - if [ -n "$UNITY_LICENSE" ] || { [ -n "$UNITY_EMAIL" ] && [ -n "$UNITY_PASSWORD" ]; } || [ -n "$UNITY_SERIAL" ]; then + if [ -n "$UNITY_LICENSE" ] || { [ -n "$UNITY_EMAIL" ] && [ -n "$UNITY_PASSWORD" ]; }; then echo "unity_ok=true" >> "$GITHUB_OUTPUT" else echo "unity_ok=false" >> "$GITHUB_OUTPUT" @@ -146,10 +146,14 @@ jobs: ls -la /root/.config/unity3d/Unity/licenses || true ' - # Verify entitlement written to host mount + # Verify entitlement written to host mount; allow ULF-only runs to proceed if ! find "$RUNNER_TEMP/unity-config" -type f -iname "*.xml" | grep -q .; then - echo "Unity EBL activation did not produce an entitlement on host mount: $RUNNER_TEMP/unity-config" >&2 - exit 1 + if [[ "${{ steps.ulf.outputs.ok }}" == "true" ]]; then + echo "EBL entitlement not found; proceeding with ULF-only (ok=true)." + else + echo "No entitlement produced and no valid ULF; cannot continue." >&2 + exit 1 + fi fi # EBL entitlement is already written directly to $RUNNER_TEMP/unity-config by the activation step @@ -170,8 +174,8 @@ jobs: docker run --rm --network host \ -e HOME=/root \ -v "${{ github.workspace }}:/workspace" -w /workspace \ - -v "$RUNNER_TEMP/unity-config:/root/.config/unity3d:ro" \ - -v "$RUNNER_TEMP/unity-local:/root/.local/share/unity3d:ro" \ + -v "$RUNNER_TEMP/unity-config:/root/.config/unity3d" \ + -v "$RUNNER_TEMP/unity-local:/root/.local/share/unity3d" \ "$UNITY_IMAGE" /opt/unity/Editor/Unity -batchmode -nographics -logFile - \ -projectPath /workspace/TestProjects/UnityMCPTests \ "${manual_args[@]}" \ From 5a11cbfcde29ff7d60b9a120acb0acb68f6b8441 Mon Sep 17 00:00:00 2001 From: dsarno Date: Fri, 5 Sep 2025 19:36:58 -0700 Subject: [PATCH 15/28] fix workflow YAML parse --- .../prompts/nl-unity-suite-full-additive.md | 9 + .github/workflows/claude-nl-suite.yml | 164 +++++++++++++++--- 2 files changed, 149 insertions(+), 24 deletions(-) diff --git a/.claude/prompts/nl-unity-suite-full-additive.md b/.claude/prompts/nl-unity-suite-full-additive.md index eabeebac..7b5cdd2d 100644 --- a/.claude/prompts/nl-unity-suite-full-additive.md +++ b/.claude/prompts/nl-unity-suite-full-additive.md @@ -7,6 +7,15 @@ AllowedTools: Write,mcp__unity__manage_editor,mcp__unity__list_resources,mcp__un --- +## Result emission (STRICT) +- For each test NL-0..NL-4 and T-A..T-J, write ONE XML file at: reports/_results.xml +- The file must contain a SINGLE root element: `...` +- `` contains evidence; include any key logs. +- On failure or partial execution, still emit the fragment with a `` node explaining why. +- TESTID must be one of: NL-0, NL-1, NL-2, NL-3, NL-4, T-A, T-B, T-C, T-D, T-E, T-F, T-G, T-H, T-I, T-J. Use EXACT casing and dash. + +--- + ## Mission 1) Pick target file (prefer): - `unity://path/Assets/Scripts/LongUnityScriptClaudeTest.cs` diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 46b42ae2..c4d09fca 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -218,39 +218,58 @@ jobs: -executeMethod MCPForUnity.Editor.MCPForUnityBridge.StartAutoConnect # ---------- Wait for Unity bridge ---------- - - name: Wait for Unity bridge (real readiness & fast fail) + - name: Wait for Unity bridge (robust) shell: bash run: | - set -eu - timeout 900s bash <<'BASH' set -euo pipefail - ok_pat='(MCP(For)?Unity|AutoConnect|Bridge).*(listening|ready|started|port|bound)' - # Only license-fatal signals should abort the wait: - license_err='No valid Unity|License is not active|cannot load ULF|Signature element not found|Token not found|0 entitlement|Entitlement.*(failed|error|denied)|License (activation|return|renewal).*(failed|error|expired|denied)' - # If container already exited, fail fast with logs + deadline=$((SECONDS+900)) # 15 min max + fatal_after=$((SECONDS+120)) # give licensing 2 min to settle + + # Fail fast only if container actually died st="$(docker inspect -f '{{.State.Status}} {{.State.ExitCode}}' unity-mcp 2>/dev/null || true)" - case "$st" in - exited*|dead*) - docker logs unity-mcp --tail 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig' - exit 1;; - esac - while :; do - l="$(docker logs unity-mcp 2>&1 || true)" - if echo "$l" | grep -qiE "$ok_pat"; then exit 0; fi - # Alternate readiness: parse unity_port from status JSON and verify host TCP connect - port="$(docker exec unity-mcp bash -lc 'shopt -s nullglob; for f in /root/.unity-mcp/unity-mcp-status-*.json; do grep -ho "\"unity_port\"\\s*:\\s*[0-9]\\+" "$f"; done | sed -E "s/.*: *([0-9]+).*/\\1/" | head -n1' 2>/dev/null || true)" - if [[ -n "$port" ]] && timeout 1 bash -lc "exec 3<>/dev/tcp/127.0.0.1/$port"; then + case "$st" in exited*|dead*) docker logs unity-mcp --tail 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig'; exit 1;; esac + + # Patterns + ok_pat='(Bridge|MCP(For)?Unity|AutoConnect).*(listening|ready|started|port|bound)' + # Only truly fatal signals; allow transient "Licensing::..." chatter + license_fatal='No valid Unity|License is not active|cannot load ULF|Signature element not found|Token not found|0 entitlement|Entitlement.*(failed|denied)|License (activation|return|renewal).*(failed|expired|denied)' + + while [ $SECONDS -lt $deadline ]; do + logs="$(docker logs unity-mcp 2>&1 || true)" + + # 1) Primary: status JSON exposes TCP port + port="$(docker exec unity-mcp bash -lc 'shopt -s nullglob; for f in /root/.unity-mcp/unity-mcp-status-*.json; do grep -ho "\"unity_port\"[[:space:]]*:[[:space:]]*[0-9]\+" "$f"; done | sed -E "s/.*: *([0-9]+).*/\1/" | head -n1' 2>/dev/null || true)" + if [[ -n "${port:-}" ]] && timeout 1 bash -lc "exec 3<>/dev/tcp/127.0.0.1/$port"; then + echo "Bridge ready on port $port" exit 0 fi - if echo "$l" | grep -qiE "$license_err"; then - echo "License failure matched by wait-gate:" >&2 - echo "$l" | tail -n 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig' >&2 + + # 2) Secondary: log markers + if echo "$logs" | grep -qiE "$ok_pat"; then + echo "Bridge ready (log markers)" + exit 0 + fi + + # Only treat license failures as fatal *after* warm-up + if [ $SECONDS -ge $fatal_after ] && echo "$logs" | grep -qiE "$license_fatal"; then + echo "::error::Fatal licensing signal detected after warm-up" + echo "$logs" | tail -n 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig' + exit 1 + fi + + # If the container dies mid-wait, bail + st="$(docker inspect -f '{{.State.Status}}' unity-mcp 2>/dev/null || true)" + if [[ "$st" != "running" ]]; then + echo "::error::Unity container exited during wait"; docker logs unity-mcp --tail 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig' exit 1 fi + sleep 2 done - BASH - docker logs unity-mcp --tail 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig' || true + + echo "::error::Bridge not ready before deadline" + docker logs unity-mcp --tail 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig' + exit 1 - name: Return Pro license (if used) if: always() && steps.lic.outputs.use_ebl == 'true' && steps.lic.outputs.has_serial == 'true' @@ -375,7 +394,104 @@ jobs: model: claude-3-7-sonnet-latest timeout_minutes: "30" anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} - + + - name: Canonicalize testcase names (NL/T prefixes) + if: always() + shell: bash + run: | + python3 - <<'PY' + from pathlib import Path + import xml.etree.ElementTree as ET, re + + RULES = [ + ("NL-0", r"\b(NL-0|Baseline|State\s*Capture)\b"), + ("NL-1", r"\b(NL-1|Core\s*Method)\b"), + ("NL-2", r"\b(NL-2|Anchor|Build\s*marker)\b"), + ("NL-3", r"\b(NL-3|End[-\s]*of[-\s]*Class|Tail\s*test)\b"), + ("NL-4", r"\b(NL-4|Console|Unity\s*console)\b"), + ("T-A", r"\b(T-?A|Temporary\s*Helper)\b"), + ("T-B", r"\b(T-?B|Method\s*Body\s*Interior)\b"), + ("T-C", r"\b(T-?C|Different\s*Method\s*Interior|ApplyBlend)\b"), + ("T-D", r"\b(T-?D|End[-\s]*of[-\s]*Class\s*Helper|TestHelper)\b"), + ("T-E", r"\b(T-?E|Method\s*Evolution|Counter|IncrementCounter)\b"), + ("T-F", r"\b(T-?F|Atomic\s*Multi[-\s]*Edit)\b"), + ("T-G", r"\b(T-?G|Path\s*Normalization)\b"), + ("T-H", r"\b(T-?H|Validation\s*on\s*Modified)\b"), + ("T-I", r"\b(T-?I|Failure\s*Surface)\b"), + ("T-J", r"\b(T-?J|Idempotenc(y|e))\b"), + ] + + def canon_name(name: str) -> str: + n = name or "" + for tid, pat in RULES: + if re.search(pat, n, flags=re.I): + suffix = re.sub(rf"^\s*{re.escape(tid)}\s*[:.\-–—]?\s*", "", n, flags=re.I) + return f"{tid}" + (f": {suffix}" if suffix.strip() else "") + return n + + for frag in sorted(Path("reports").glob("*_results.xml")): + try: + tree = ET.parse(frag); root = tree.getroot() + except Exception: + continue + if root.tag != "testcase": + continue + old = root.get("name") or "" + new = canon_name(old) + if new != old and new: + root.set("name", new) + tree.write(frag, encoding="utf-8", xml_declaration=False) + print(f'canon: {frag.name}: "{old}" -> "{new}"') + PY + + - name: Backfill missing NL/T tests (fail placeholders) + if: always() + shell: bash + run: | + python3 - <<'PY' + from pathlib import Path + import xml.etree.ElementTree as ET + + DESIRED = ["NL-0","NL-1","NL-2","NL-3","NL-4","T-A","T-B","T-C","T-D","T-E","T-F","T-G","T-H","T-I","T-J"] + seen = set() + for p in Path("reports").glob("*_results.xml"): + try: + r = ET.parse(p).getroot() + except Exception: + continue + if r.tag == "testcase": + name = (r.get("name") or "").strip() + for d in DESIRED: + if name.startswith(d): + seen.add(d) + break + + Path("reports").mkdir(parents=True, exist_ok=True) + for d in DESIRED: + if d in seen: + continue + frag = Path(f"reports/{d}_results.xml") + tc = ET.Element("testcase", {"classname":"UnityMCP.NL-T", "name": d}) + fail = ET.SubElement(tc, "failure", {"message":"not produced"}) + fail.text = "The agent did not emit a fragment for this test." + ET.ElementTree(tc).write(frag, encoding="utf-8", xml_declaration=False) + print(f"backfill: {d}") + PY + + - name: "Debug: list testcase names" + if: always() + run: | + python3 - <<'PY' + from pathlib import Path, xml.etree.ElementTree as ET + for p in sorted(Path('reports').glob('*_results.xml')): + try: + r = ET.parse(p).getroot() + if r.tag == 'testcase': + print(f"{p.name}: {(r.get('name') or '').strip()}") + except Exception: + pass + PY + # ---------- Merge testcase fragments into JUnit ---------- - name: Normalize/assemble JUnit in-place (single file) if: always() From 1b63bb6b54d80c84fa422e6873ba00f1118e9597 Mon Sep 17 00:00:00 2001 From: dsarno Date: Fri, 5 Sep 2025 20:17:04 -0700 Subject: [PATCH 16/28] Normalize NL/T JUnit names and robust summary --- .../prompts/nl-unity-suite-full-additive.md | 5 +- .github/workflows/claude-nl-suite.yml | 170 ++++++++++++------ 2 files changed, 118 insertions(+), 57 deletions(-) diff --git a/.claude/prompts/nl-unity-suite-full-additive.md b/.claude/prompts/nl-unity-suite-full-additive.md index 7b5cdd2d..a4f5dded 100644 --- a/.claude/prompts/nl-unity-suite-full-additive.md +++ b/.claude/prompts/nl-unity-suite-full-additive.md @@ -9,7 +9,8 @@ AllowedTools: Write,mcp__unity__manage_editor,mcp__unity__list_resources,mcp__un ## Result emission (STRICT) - For each test NL-0..NL-4 and T-A..T-J, write ONE XML file at: reports/_results.xml -- The file must contain a SINGLE root element: `...` +- The file must contain a SINGLE root element. +- When writing a fragment, set ``. - `` contains evidence; include any key logs. - On failure or partial execution, still emit the fragment with a `` node explaining why. - TESTID must be one of: NL-0, NL-1, NL-2, NL-3, NL-4, T-A, T-B, T-C, T-D, T-E, T-F, T-G, T-H, T-I, T-J. Use EXACT casing and dash. @@ -21,7 +22,7 @@ AllowedTools: Write,mcp__unity__manage_editor,mcp__unity__list_resources,mcp__un - `unity://path/Assets/Scripts/LongUnityScriptClaudeTest.cs` 2) Execute **all** NL/T tests in order using minimal, precise edits that **build on each other**. 3) Validate each edit with `mcp__unity__validate_script(level:"standard")`. -4) **Report**: write one `` XML fragment per test to `reports/_results.xml`. Do **not** read or edit `$JUNIT_OUT`. +4) **Report**: write one `` XML fragment per test to `reports/_results.xml`. Do **not** read or edit `$JUNIT_OUT`. Do not create directories; assume `reports/` exists and write fragments directly. 5) **NO RESTORATION** - tests build additively on previous state. --- diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index c4d09fca..d2403cee 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -501,44 +501,84 @@ jobs: from pathlib import Path import xml.etree.ElementTree as ET import re, os - def localname(tag: str) -> str: return tag.rsplit('}', 1)[-1] if '}' in tag else tag + + def localname(tag: str) -> str: + return tag.rsplit('}', 1)[-1] if '}' in tag else tag + src = Path(os.environ.get('JUNIT_OUT', 'reports/junit-nl-suite.xml')) - if not src.exists(): raise SystemExit(0) - tree = ET.parse(src); root = tree.getroot() + if not src.exists(): + raise SystemExit(0) + + tree = ET.parse(src) + root = tree.getroot() suite = root.find('./*') if localname(root.tag) == 'testsuites' else root - if suite is None: raise SystemExit(0) + if suite is None: + raise SystemExit(0) + + def id_from_filename(p: Path): + n = p.name + m = re.match(r'NL(\d+)_results\.xml$', n, re.I) + if m: + return f"NL-{int(m.group(1))}" + m = re.match(r'T([A-J])_results\.xml$', n, re.I) + if m: + return f"T-{m.group(1).upper()}" + return None + + def id_from_system_out(tc): + so = tc.find('system-out') + if so is not None and so.text: + m = re.search(r'\b(NL-\d+|T-[A-Z])\b', so.text) + if m: + return m.group(1) + return None + fragments = sorted(Path('reports').glob('*_results.xml')) added = 0 + renamed = 0 + for frag in fragments: + tcs = [] try: froot = ET.parse(frag).getroot() if localname(froot.tag) == 'testcase': - suite.append(froot); added += 1 + tcs = [froot] else: - for tc in froot.findall('.//testcase'): - suite.append(tc); added += 1 + tcs = list(froot.findall('.//testcase')) except Exception: txt = Path(frag).read_text(encoding='utf-8', errors='replace') - for m in re.findall(r'', txt, flags=re.DOTALL): - try: suite.append(ET.fromstring(m)); added += 1 - except Exception: pass + for m in re.findall(r'', txt, flags=re.DOTALL): + try: + tcs.append(ET.fromstring(m)) + except Exception: + pass + + test_id = id_from_filename(frag) + + for tc in tcs: + current_name = tc.get('name') or '' + tid = test_id or id_from_system_out(tc) + if tid and not re.match(r'^\s*(NL-\d+|T-[A-Z])\b', current_name): + title = current_name.strip() + new_name = f'{tid} — {title}' if title else tid + tc.set('name', new_name) + renamed += 1 + suite.append(tc) + added += 1 + if added: # Drop bootstrap placeholder and recompute counts - removed_bootstrap = 0 for tc in list(suite.findall('.//testcase')): - name = (tc.get('name') or '') - if name == 'NL-Suite.Bootstrap': + if (tc.get('name') or '') == 'NL-Suite.Bootstrap': suite.remove(tc) - removed_bootstrap += 1 testcases = suite.findall('.//testcase') - tests_cnt = len(testcases) failures_cnt = sum(1 for tc in testcases if (tc.find('failure') is not None or tc.find('error') is not None)) - suite.set('tests', str(tests_cnt)) + suite.set('tests', str(len(testcases))) suite.set('failures', str(failures_cnt)) - suite.set('errors', str(0)) - suite.set('skipped', str(0)) + suite.set('errors', '0') + suite.set('skipped', '0') tree.write(src, encoding='utf-8', xml_declaration=True) - print(f"Added {added} testcase fragments; removed bootstrap={removed_bootstrap}; tests={tests_cnt}; failures={failures_cnt}") + print(f"Appended {added} testcase(s); renamed {renamed} to canonical NL/T names.") PY # ---------- Markdown summary from JUnit ---------- @@ -549,14 +589,13 @@ jobs: python3 - <<'PY' import xml.etree.ElementTree as ET from pathlib import Path - import os, html + import os, html, re def localname(tag: str) -> str: return tag.rsplit('}', 1)[-1] if '}' in tag else tag src = Path(os.environ.get('JUNIT_OUT', 'reports/junit-nl-suite.xml')) md_out = Path(os.environ.get('MD_OUT', 'reports/junit-nl-suite.md')) - # Ensure destination directory exists even if earlier prep steps were skipped md_out.parent.mkdir(parents=True, exist_ok=True) if not src.exists(): @@ -568,18 +607,32 @@ jobs: suite = root.find('./*') if localname(root.tag) == 'testsuites' else root cases = [] if suite is None else list(suite.findall('.//testcase')) - total = len(cases) - failures = sum(1 for tc in cases if (tc.find('failure') is not None or tc.find('error') is not None)) - passed = total - failures + def id_from_case(tc): + n = (tc.get('name') or '') + m = re.match(r'\s*(NL-\d+|T-[A-Z])\b', n) + if m: + return m.group(1) + so = tc.find('system-out') + if so is not None and so.text: + m = re.search(r'\b(NL-\d+|T-[A-Z])\b', so.text) + if m: + return m.group(1) + return None + + id_status = {} + name_map = {} + for tc in cases: + tid = id_from_case(tc) + ok = (tc.find('failure') is None and tc.find('error') is None) + if tid and tid not in id_status: + id_status[tid] = ok + name_map[tid] = (tc.get('name') or tid) desired = ['NL-0','NL-1','NL-2','NL-3','NL-4','T-A','T-B','T-C','T-D','T-E','T-F','T-G','T-H','T-I','T-J'] - name_to_case = {(tc.get('name') or ''): tc for tc in cases} - def status_for(prefix: str): - for name, tc in name_to_case.items(): - if name.startswith(prefix): - return not ((tc.find('failure') is not None) or (tc.find('error') is not None)) - return None + total = len(cases) + failures = sum(1 for tc in cases if (tc.find('failure') is not None or tc.find('error') is not None)) + passed = total - failures lines = [] lines += [ @@ -590,52 +643,59 @@ jobs: '## Test Checklist' ] for p in desired: - st = status_for(p) + st = id_status.get(p, None) lines.append(f"- [x] {p}" if st is True else (f"- [ ] {p} (fail)" if st is False else f"- [ ] {p} (not run)")) lines.append('') - # Rich per-test system-out details lines.append('## Test Details') def order_key(n: str): - try: - if n.startswith('NL-') and n[3].isdigit(): - return (0, int(n.split('.')[0].split('-')[1])) - except Exception: - pass - if n.startswith('T-') and len(n) > 2 and n[2].isalpha(): + if n.startswith('NL-'): + try: + return (0, int(n.split('-')[1])) + except: + return (0, 999) + if n.startswith('T-') and len(n) > 2: return (1, ord(n[2])) return (2, n) MAX_CHARS = 2000 - for name in sorted(name_to_case.keys(), key=order_key): - tc = name_to_case[name] - status_badge = "PASS" if (tc.find('failure') is None and tc.find('error') is None) else "FAIL" - lines.append(f"### {name} — {status_badge}") + seen = set() + for tid in sorted(id_status.keys(), key=order_key): + seen.add(tid) + tc = next((c for c in cases if (id_from_case(c) == tid)), None) + if not tc: + continue + title = name_map.get(tid, tid) + status_badge = "PASS" if id_status[tid] else "FAIL" + lines.append(f"### {title} — {status_badge}") so = tc.find('system-out') - text = '' if so is None or so.text is None else so.text.replace('\r\n','\n') - # Unescape XML entities so code reads naturally (e.g., => instead of =>) - if text: - text = html.unescape(text) + text = '' if so is None or so.text is None else html.unescape(so.text.replace('\r\n','\n')) if text.strip(): t = text.strip() if len(t) > MAX_CHARS: t = t[:MAX_CHARS] + "\n…(truncated)" - # Use a safer fence if content contains triple backticks - fence = '```' - if '```' in t: - fence = '````' - lines.append(fence) - lines.append(t) - lines.append(fence) + fence = '```' if '```' not in t else '````' + lines += [fence, t, fence] else: lines.append('(no system-out)') node = tc.find('failure') or tc.find('error') if node is not None: msg = (node.get('message') or '').strip() body = (node.text or '').strip() - if msg: lines.append(f"- Message: {msg}") - if body: lines.append(f"- Detail: {body.splitlines()[0][:500]}") + if msg: + lines.append(f"- Message: {msg}") + if body: + lines.append(f"- Detail: {body.splitlines()[0][:500]}") + lines.append('') + + for tc in cases: + if id_from_case(tc) in seen: + continue + title = tc.get('name') or '(unnamed)' + status_badge = "PASS" if (tc.find('failure') is None and tc.find('error') is None) else "FAIL" + lines.append(f"### {title} — {status_badge}") + lines.append('(unmapped test id)') lines.append('') md_out.write_text('\n'.join(lines), encoding='utf-8') From 0e2c3246069815c7f1d071c4aaca1c10cbbc9170 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 20:51:40 -0700 Subject: [PATCH 17/28] Fix Python import syntax in workflow debug step --- .github/workflows/claude-nl-suite.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index d2403cee..0bb86a2f 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -482,7 +482,8 @@ jobs: if: always() run: | python3 - <<'PY' - from pathlib import Path, xml.etree.ElementTree as ET + from pathlib import Path + import xml.etree.ElementTree as ET for p in sorted(Path('reports').glob('*_results.xml')): try: r = ET.parse(p).getroot() From 3bc7bf5228c152fe679121a995e22dda18f0eb88 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 20:56:15 -0700 Subject: [PATCH 18/28] Improve prompt clarity for XML test fragment format - Add detailed XML format requirements with exact specifications - Emphasize NO prologue, epilogue, code fences, or extra characters - Add specific instructions for T-D and T-J tests to write fragments immediately - Include exact XML template and TESTID requirements - Should fix T-D and T-J test failures in CI by ensuring proper fragment format --- .../prompts/nl-unity-suite-full-additive.md | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/.claude/prompts/nl-unity-suite-full-additive.md b/.claude/prompts/nl-unity-suite-full-additive.md index a4f5dded..21b05c4b 100644 --- a/.claude/prompts/nl-unity-suite-full-additive.md +++ b/.claude/prompts/nl-unity-suite-full-additive.md @@ -7,22 +7,29 @@ AllowedTools: Write,mcp__unity__manage_editor,mcp__unity__list_resources,mcp__un --- -## Result emission (STRICT) -- For each test NL-0..NL-4 and T-A..T-J, write ONE XML file at: reports/_results.xml -- The file must contain a SINGLE root element. -- When writing a fragment, set ``. -- `` contains evidence; include any key logs. -- On failure or partial execution, still emit the fragment with a `` node explaining why. -- TESTID must be one of: NL-0, NL-1, NL-2, NL-3, NL-4, T-A, T-B, T-C, T-D, T-E, T-F, T-G, T-H, T-I, T-J. Use EXACT casing and dash. - ---- - ## Mission 1) Pick target file (prefer): - `unity://path/Assets/Scripts/LongUnityScriptClaudeTest.cs` 2) Execute **all** NL/T tests in order using minimal, precise edits that **build on each other**. 3) Validate each edit with `mcp__unity__validate_script(level:"standard")`. -4) **Report**: write one `` XML fragment per test to `reports/_results.xml`. Do **not** read or edit `$JUNIT_OUT`. Do not create directories; assume `reports/` exists and write fragments directly. +4) **Report**: write one `` XML fragment per test to `reports/_results.xml`. Do **not** read or edit `$JUNIT_OUT`. + +**CRITICAL XML FORMAT REQUIREMENTS:** +- Each file must contain EXACTLY one `` root element +- NO prologue, epilogue, code fences, or extra characters +- NO markdown formatting or explanations outside the XML +- Use this exact format: + +```xml + + + +``` + +- If test fails, include: `` +- TESTID must be one of: NL-0, NL-1, NL-2, NL-3, NL-4, T-A, T-B, T-C, T-D, T-E, T-F, T-G, T-H, T-I, T-J 5) **NO RESTORATION** - tests build additively on previous state. --- @@ -145,6 +152,8 @@ CI provides: **Actions**: - Use smart anchor matching to find current class-ending brace (after NL-3 tail comments) - Insert permanent helper before class brace: `private void TestHelper() { /* placeholder */ }` +- Validate with `mcp__unity__validate_script(level:"standard")` +- **IMMEDIATELY** write clean XML fragment to `reports/T-D_results.xml` (no extra text) - **Expected final state**: State E + TestHelper() method before class end ### T-E. Method Evolution Lifecycle (Additive State G) @@ -199,6 +208,7 @@ CI provides: - **Remove (structured)**: `{"op":"regex_replace","pattern":"(?m)^\\s*// idempotency test marker\\r?\\n?","text":""}` - **Remove again** (same `regex_replace`) → expect `no_op: true`. - `mcp__unity__validate_script(level:"standard")` +- **IMMEDIATELY** write clean XML fragment to `reports/T-J_results.xml` with evidence of both `no_op: true` outcomes - **Expected final state**: State H + verified idempotent behavior --- From fd626eabf1fa2deefd133a29612f31766c6f307c Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 21:01:37 -0700 Subject: [PATCH 19/28] Fix problematic regex substitution in test name canonicalization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace unsafe regex substitution that could create malformed names - New approach: preserve correctly formatted names, extract titles safely - Prevents edge cases where double processing could corrupt test names - Uses proper em dash (—) separator consistently - More robust handling of various input formats --- .github/workflows/claude-nl-suite.yml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 0bb86a2f..8d519070 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -425,8 +425,16 @@ jobs: n = name or "" for tid, pat in RULES: if re.search(pat, n, flags=re.I): - suffix = re.sub(rf"^\s*{re.escape(tid)}\s*[:.\-–—]?\s*", "", n, flags=re.I) - return f"{tid}" + (f": {suffix}" if suffix.strip() else "") + # If it already starts with the correct format, leave it alone + if re.match(rf'^\s*{re.escape(tid)}\s*[—–-]', n, flags=re.I): + return n.strip() + # If it has a different separator, extract title and reformat + title_match = re.search(rf'{re.escape(tid)}\s*[:.\-–—]\s*(.+)', n, flags=re.I) + if title_match: + title = title_match.group(1).strip() + return f"{tid} — {title}" + # Otherwise, just return the canonical ID + return tid return n for frag in sorted(Path("reports").glob("*_results.xml")): From 181f3ad24e13465372af9e58181398de180614f9 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Fri, 5 Sep 2025 21:26:09 -0700 Subject: [PATCH 20/28] =?UTF-8?q?CI:=20NL/T=20hardening=20=E2=80=94=20enfo?= =?UTF-8?q?rce=20filename-derived=20IDs,=20robust=20backfill,=20single-tes?= =?UTF-8?q?tcase=20guard;=20tighten=20prompt=20emissions;=20disallow=20Bas?= =?UTF-8?q?h?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../prompts/nl-unity-suite-full-additive.md | 16 ++++-- .github/workflows/claude-nl-suite.yml | 52 +++++++++++++++++-- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/.claude/prompts/nl-unity-suite-full-additive.md b/.claude/prompts/nl-unity-suite-full-additive.md index 21b05c4b..1db10097 100644 --- a/.claude/prompts/nl-unity-suite-full-additive.md +++ b/.claude/prompts/nl-unity-suite-full-additive.md @@ -31,6 +31,7 @@ AllowedTools: Write,mcp__unity__manage_editor,mcp__unity__list_resources,mcp__un - If test fails, include: `` - TESTID must be one of: NL-0, NL-1, NL-2, NL-3, NL-4, T-A, T-B, T-C, T-D, T-E, T-F, T-G, T-H, T-I, T-J 5) **NO RESTORATION** - tests build additively on previous state. +6) **STRICT FRAGMENT EMISSION** - After completing T-D and T-J, immediately emit a clean XML file under `reports/_results.xml` with exactly one `` whose `name` begins with the exact test id. No prologue/epilogue or fences. --- @@ -153,7 +154,7 @@ CI provides: - Use smart anchor matching to find current class-ending brace (after NL-3 tail comments) - Insert permanent helper before class brace: `private void TestHelper() { /* placeholder */ }` - Validate with `mcp__unity__validate_script(level:"standard")` -- **IMMEDIATELY** write clean XML fragment to `reports/T-D_results.xml` (no extra text) +- **IMMEDIATELY** write clean XML fragment to `reports/T-D_results.xml` (no extra text). The `` must start with `T-D`. Include brief evidence and the latest SHA in `system-out`. - **Expected final state**: State E + TestHelper() method before class end ### T-E. Method Evolution Lifecycle (Additive State G) @@ -174,6 +175,7 @@ CI provides: 3. Add final class comment: `// end of test modifications` - All edits computed from same file snapshot, applied atomically - **Expected final state**: State G + three coordinated comments +- After applying the atomic edits, run `validate_script(level:"standard")` and emit a clean fragment to `reports/T-F_results.xml` with a short summary and the latest SHA. ### T-G. Path Normalization Test (No State Change) **Goal**: Verify URI forms work equivalently on modified file @@ -183,6 +185,7 @@ CI provides: - Second should return `stale_file`, retry with updated SHA - Verify both URI forms target same file - **Expected final state**: State H (no content change, just path testing) +- Emit `reports/T-G_results.xml` showing evidence of stale SHA handling and final SHA. ### T-H. Validation on Modified File (No State Change) **Goal**: Ensure validation works correctly on heavily modified file @@ -190,6 +193,7 @@ CI provides: - Run `validate_script(level:"standard")` on current state - Verify no structural errors despite extensive modifications - **Expected final state**: State H (validation only, no edits) +- Emit `reports/T-H_results.xml` confirming validation OK and including the latest SHA. ### T-I. Failure Surface Testing (No State Change) **Goal**: Test error handling on real modified file @@ -198,6 +202,7 @@ CI provides: - Attempt edit with stale SHA (should fail cleanly) - Verify error responses are informative - **Expected final state**: State H (failed operations don't modify file) +- Emit `reports/T-I_results.xml` capturing error evidence and final SHA; file must contain one ``. ### T-J. Idempotency on Modified File (Additive State I) **Goal**: Verify operations behave predictably when repeated @@ -208,7 +213,7 @@ CI provides: - **Remove (structured)**: `{"op":"regex_replace","pattern":"(?m)^\\s*// idempotency test marker\\r?\\n?","text":""}` - **Remove again** (same `regex_replace`) → expect `no_op: true`. - `mcp__unity__validate_script(level:"standard")` -- **IMMEDIATELY** write clean XML fragment to `reports/T-J_results.xml` with evidence of both `no_op: true` outcomes +- **IMMEDIATELY** write clean XML fragment to `reports/T-J_results.xml` with evidence of both `no_op: true` outcomes. The `` must start with `T-J` and include the latest SHA. - **Expected final state**: State H + verified idempotent behavior --- @@ -263,4 +268,9 @@ find_in_file(pattern: "public bool HasTarget\\(\\)") 5. **Better Failure Analysis**: Failures don't cascade - each test adapts to current reality 6. **State Evolution Testing**: Validates SDK handles cumulative file modifications correctly -This additive approach produces a more realistic and maintainable test suite that better represents actual SDK usage patterns. \ No newline at end of file +This additive approach produces a more realistic and maintainable test suite that better represents actual SDK usage patterns. + +--- + +BAN ON EXTRA TOOLS AND DIRS +- Do not use any tools outside `AllowedTools`. Do not create directories; assume `reports/` exists. \ No newline at end of file diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 8d519070..0e73216c 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -380,7 +380,6 @@ jobs: mcp_config: .claude/mcp.json allowed_tools: >- Write, - Bash(scripts/nlt-revert.sh:*), mcp__unity__manage_editor, mcp__unity__list_resources, mcp__unity__read_resource, @@ -390,7 +389,7 @@ jobs: mcp__unity__find_in_file, mcp__unity__read_console, mcp__unity__get_sha - disallowed_tools: TodoWrite,Task + disallowed_tools: TodoWrite,Task,Bash model: claude-3-7-sonnet-latest timeout_minutes: "30" anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} @@ -437,6 +436,16 @@ jobs: return tid return n + def id_from_filename(p: Path): + n = p.name + m = re.match(r'NL(\d+)_results\.xml$', n, re.I) + if m: + return f"NL-{int(m.group(1))}" + m = re.match(r'T([A-J])_results\.xml$', n, re.I) + if m: + return f"T-{m.group(1).upper()}" + return None + for frag in sorted(Path("reports").glob("*_results.xml")): try: tree = ET.parse(frag); root = tree.getroot() @@ -444,8 +453,15 @@ jobs: continue if root.tag != "testcase": continue + file_id = id_from_filename(frag) old = root.get("name") or "" - new = canon_name(old) + # Prefer filename-derived ID; if name doesn't start with it, override + if file_id and not re.match(rf'^\s*{re.escape(file_id)}\b', old): + # Strip any existing leading ID + separator to form a clean title + title_match = re.sub(r'^\s*(NL-\d+|T-[A-Z])\s*[—–:\-]\s*', '', old).strip() + new = f"{file_id} — {title_match}" if title_match else file_id + else: + new = canon_name(old) if new != old and new: root.set("name", new) tree.write(frag, encoding="utf-8", xml_declaration=False) @@ -459,14 +475,30 @@ jobs: python3 - <<'PY' from pathlib import Path import xml.etree.ElementTree as ET + import re DESIRED = ["NL-0","NL-1","NL-2","NL-3","NL-4","T-A","T-B","T-C","T-D","T-E","T-F","T-G","T-H","T-I","T-J"] seen = set() + def id_from_filename(p: Path): + n = p.name + m = re.match(r'NL(\d+)_results\.xml$', n, re.I) + if m: + return f"NL-{int(m.group(1))}" + m = re.match(r'T([A-J])_results\.xml$', n, re.I) + if m: + return f"T-{m.group(1).upper()}" + return None + for p in Path("reports").glob("*_results.xml"): try: r = ET.parse(p).getroot() except Exception: continue + # Count by filename id primarily; fall back to testcase name if needed + fid = id_from_filename(p) + if fid in DESIRED: + seen.add(fid) + continue if r.tag == "testcase": name = (r.get("name") or "").strip() for d in DESIRED: @@ -556,21 +588,33 @@ jobs: tcs = list(froot.findall('.//testcase')) except Exception: txt = Path(frag).read_text(encoding='utf-8', errors='replace') - for m in re.findall(r'', txt, flags=re.DOTALL): + # Extract all testcase nodes from raw text + nodes = re.findall(r'', txt, flags=re.DOTALL) + for m in nodes: try: tcs.append(ET.fromstring(m)) except Exception: pass + # Guard: keep only the first testcase from each fragment + if len(tcs) > 1: + tcs = tcs[:1] + test_id = id_from_filename(frag) for tc in tcs: current_name = tc.get('name') or '' tid = test_id or id_from_system_out(tc) + # Enforce filename-derived ID as prefix; repair names if needed if tid and not re.match(r'^\s*(NL-\d+|T-[A-Z])\b', current_name): title = current_name.strip() new_name = f'{tid} — {title}' if title else tid tc.set('name', new_name) + elif tid and not re.match(rf'^\s*{re.escape(tid)}\b', current_name): + # Replace any wrong leading ID with the correct one + title = re.sub(r'^\s*(NL-\d+|T-[A-Z])\s*[—–:\-]\s*', '', current_name).strip() + new_name = f'{tid} — {title}' if title else tid + tc.set('name', new_name) renamed += 1 suite.append(tc) added += 1 From 3e49259543ff56a7a1e64a5d7ab98c3141546c02 Mon Sep 17 00:00:00 2001 From: dsarno Date: Fri, 5 Sep 2025 22:46:07 -0700 Subject: [PATCH 21/28] fix: keep file ID when canonicalizing test names --- .github/workflows/claude-nl-suite.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 0e73216c..c919addd 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -406,7 +406,7 @@ jobs: ("NL-0", r"\b(NL-0|Baseline|State\s*Capture)\b"), ("NL-1", r"\b(NL-1|Core\s*Method)\b"), ("NL-2", r"\b(NL-2|Anchor|Build\s*marker)\b"), - ("NL-3", r"\b(NL-3|End[-\s]*of[-\s]*Class|Tail\s*test)\b"), + ("NL-3", r"\b(NL-3|End[-\s]*of[-\s]*Class\s*Content|Tail\s*test\s*[ABC])\b"), ("NL-4", r"\b(NL-4|Console|Unity\s*console)\b"), ("T-A", r"\b(T-?A|Temporary\s*Helper)\b"), ("T-B", r"\b(T-?B|Method\s*Body\s*Interior)\b"), @@ -456,10 +456,10 @@ jobs: file_id = id_from_filename(frag) old = root.get("name") or "" # Prefer filename-derived ID; if name doesn't start with it, override - if file_id and not re.match(rf'^\s*{re.escape(file_id)}\b', old): - # Strip any existing leading ID + separator to form a clean title - title_match = re.sub(r'^\s*(NL-\d+|T-[A-Z])\s*[—–:\-]\s*', '', old).strip() - new = f"{file_id} — {title_match}" if title_match else file_id + if file_id: + # Respect file's ID (prevents T-D being renamed to NL-3 by loose patterns) + title = re.sub(r'^\s*(NL-\d+|T-[A-Z])\s*[—–:\-]\s*', '', old).strip() + new = f"{file_id} — {title}" if title else file_id else: new = canon_name(old) if new != old and new: From d0937e8a015a1fc3271064164af46aa1e1b6b4cf Mon Sep 17 00:00:00 2001 From: David Sarno Date: Sat, 6 Sep 2025 09:42:23 -0700 Subject: [PATCH 22/28] CI: move Unity Pro license return to teardown after stopping Unity; keep placeholder at original site --- .github/workflows/claude-nl-suite.yml | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index c919addd..35d5cac9 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -271,14 +271,7 @@ jobs: docker logs unity-mcp --tail 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig' exit 1 - - name: Return Pro license (if used) - if: always() && steps.lic.outputs.use_ebl == 'true' && steps.lic.outputs.has_serial == 'true' - uses: game-ci/unity-return-license@v2 - continue-on-error: true - env: - UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }} - UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }} - UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }} + # (moved) — return license after Unity is stopped # ---------- MCP client config ---------- - name: Write MCP config (.claude/mcp.json) @@ -853,4 +846,13 @@ jobs: run: | docker logs --tail 400 unity-mcp | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' || true docker rm -f unity-mcp || true + + - name: Return Pro license (if used) + if: always() && steps.lic.outputs.use_ebl == 'true' && steps.lic.outputs.has_serial == 'true' + uses: game-ci/unity-return-license@v2 + continue-on-error: true + env: + UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }} + UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }} + UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }} \ No newline at end of file From c86c68315ee2e54deb0faec16cb0650e045153e4 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Sat, 6 Sep 2025 09:45:03 -0700 Subject: [PATCH 23/28] CI: remove revert helper & baseline snapshot; stop creating scripts dir; prompt: standardize T-B validation to level=standard --- .../prompts/nl-unity-suite-full-additive.md | 2 +- .github/workflows/claude-nl-suite.yml | 48 +------------------ 2 files changed, 3 insertions(+), 47 deletions(-) diff --git a/.claude/prompts/nl-unity-suite-full-additive.md b/.claude/prompts/nl-unity-suite-full-additive.md index 1db10097..9d98ecd7 100644 --- a/.claude/prompts/nl-unity-suite-full-additive.md +++ b/.claude/prompts/nl-unity-suite-full-additive.md @@ -136,7 +136,7 @@ CI provides: **Actions**: - Use `find_in_file` to locate current `HasTarget()` method (modified in NL-1) - Edit method body interior: change return statement to `return true; /* test modification */` -- Use `validate: "relaxed"` for interior-only edit +- Validate with `mcp__unity__validate_script(level:"standard")` for consistency - Verify edit succeeded and file remains balanced - **Expected final state**: State C + modified HasTarget() body diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 35d5cac9..b4c86d92 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -300,7 +300,7 @@ jobs: run: | set -eux rm -f reports/*.xml reports/*.md || true - mkdir -p reports reports/_snapshots scripts + mkdir -p reports reports/_snapshots - name: Create report skeletons run: | @@ -315,51 +315,7 @@ jobs: XML printf '# Unity NL/T Editing Suite Test Results\n\n' > "$MD_OUT" - - name: Write safe revert helper (scripts/nlt-revert.sh) - shell: bash - run: | - set -eux - cat > scripts/nlt-revert.sh <<'BASH' - #!/usr/bin/env bash - set -euo pipefail - sub="${1:-}"; target_rel="${2:-}"; snap="${3:-}" - WS="${GITHUB_WORKSPACE:-$PWD}" - ROOT="$WS/TestProjects/UnityMCPTests" - t_abs="$(realpath -m "$WS/$target_rel")" - s_abs="$(realpath -m "$WS/$snap")" - if [[ "$t_abs" != "$ROOT/Assets/"* ]]; then - echo "refuse: target outside allowed scope: $t_abs" >&2; exit 2 - fi - mkdir -p "$(dirname "$s_abs")" - case "$sub" in - snapshot) - cp -f "$t_abs" "$s_abs" - sha=$(sha256sum "$s_abs" | awk '{print $1}') - echo "snapshot_sha=$sha" - ;; - restore) - if [[ ! -f "$s_abs" ]]; then echo "snapshot missing: $s_abs" >&2; exit 3; fi - cp -f "$s_abs" "$t_abs" - touch "$t_abs" - sha=$(sha256sum "$t_abs" | awk '{print $1}') - echo "restored_sha=$sha" - ;; - *) - echo "usage: $0 snapshot|restore " >&2; exit 1 - ;; - esac - BASH - chmod +x scripts/nlt-revert.sh - - # ---------- Snapshot baseline (pre-agent) ---------- - - name: Snapshot baseline (pre-agent) - if: steps.detect.outputs.anthropic_ok == 'true' && steps.detect.outputs.unity_ok == 'true' - shell: bash - run: | - set -euo pipefail - TARGET="TestProjects/UnityMCPTests/Assets/Scripts/LongUnityScriptClaudeTest.cs" - SNAP="reports/_snapshots/LongUnityScriptClaudeTest.cs.baseline" - scripts/nlt-revert.sh snapshot "$TARGET" "$SNAP" + # (removed) Revert helper and baseline snapshot are no longer used # ---------- Run suite ---------- From e1d8ac57a73292ba7c0b74c144ae6f23a76e1153 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Sat, 6 Sep 2025 09:48:30 -0700 Subject: [PATCH 24/28] CI: remove mini workflow and obsolete NL prompts; redact email in all Unity log dumps --- .claude/prompts/nl-unity-claude-tests-mini.md | 45 --- .claude/prompts/nl-unity-suite-full.md | 234 ------------ .github/workflows/claude-nl-suite-mini.yml | 356 ------------------ .github/workflows/claude-nl-suite.yml | 10 +- 4 files changed, 5 insertions(+), 640 deletions(-) delete mode 100644 .claude/prompts/nl-unity-claude-tests-mini.md delete mode 100644 .claude/prompts/nl-unity-suite-full.md delete mode 100644 .github/workflows/claude-nl-suite-mini.yml diff --git a/.claude/prompts/nl-unity-claude-tests-mini.md b/.claude/prompts/nl-unity-claude-tests-mini.md deleted file mode 100644 index 35900b71..00000000 --- a/.claude/prompts/nl-unity-claude-tests-mini.md +++ /dev/null @@ -1,45 +0,0 @@ -# Unity NL Editing Suite — Natural Mode - -You are running inside CI for the **unity-mcp** repository. Your task is to demonstrate end‑to‑end **natural‑language code editing** on a representative Unity C# script using whatever capabilities and servers are already available in this session. Work autonomously. Do not ask the user for input. Do NOT spawn subagents, as they will not have access to the mcp server process on the top-level agent. - -## Mission -1) **Discover capabilities.** Quietly inspect the tools and any connected servers that are available to you at session start. If the server offers a primer or capabilities resource, read it before acting. -2) **Choose a target file.** Prefer `TestProjects/UnityMCPTests/Assets/Scripts/LongUnityScriptClaudeTest.cs` if it exists; otherwise choose a simple, safe C# script under `TestProjects/UnityMCPTests/Assets/`. -3) **Perform a small set of realistic edits** using minimal, precise changes (not full-file rewrites). Examples of small edits you may choose from (pick 3–6 total): - - Insert a new, small helper method (e.g., a logger or counter) in a sensible location. - - Add a short anchor comment near a key method (e.g., above `Update()`), then add or modify a few lines nearby. - - Append an end‑of‑class utility method (e.g., formatting or clamping helper). - - Make a safe, localized tweak to an existing method body (e.g., add a guard or a simple accumulator). - - Optionally include one idempotency/no‑op check (re‑apply an edit and confirm nothing breaks). -4) **Validate your edits.** Re‑read the modified regions and verify the changes exist, compile‑risk is low, and surrounding structure remains intact. -5) **Report results.** Produce both: - - A JUnit XML at `reports/junit-nl-suite.xml` containing a single suite named `UnityMCP.NL` with one test case per sub‑test you executed (mark pass/fail and include helpful failure text). - - A summary markdown at `reports/junit-nl-suite.md` that explains what you attempted, what succeeded/failed, and any follow‑ups you would try. -6) **Be gentle and reversible.** Prefer targeted, minimal edits; avoid wide refactors or non‑deterministic changes. - -## Assumptions & Hints (non‑prescriptive) -- A Unity‑oriented MCP server is expected to be connected. If a server‑provided **primer/capabilities** resource exists, read it first. If no primer is available, infer capabilities from your visible tools in the session. -- In CI/headless mode, when calling `mcp__unity__list_resources` or `mcp__unity__read_resource`, include: - - `ctx: {}` - - `project_root: "TestProjects/UnityMCPTests"` (the server will also accept the absolute path passed via env) - Example: `{ "ctx": {}, "under": "Assets/Scripts", "pattern": "*.cs", "project_root": "TestProjects/UnityMCPTests" }` -- If the preferred file isn’t present, locate a fallback C# file with simple, local methods you can edit safely. -- If a compile command is available in this environment, you may optionally trigger it; if not, rely on structural checks and localized validation. - -## Output Requirements (match NL suite conventions) -- JUnit XML at `$JUNIT_OUT` if set, otherwise `reports/junit-nl-suite.xml`. - - Single suite named `UnityMCP.NL`, one `` per sub‑test; include `` on errors. -- Markdown at `$MD_OUT` if set, otherwise `reports/junit-nl-suite.md`. - -Constraints (for fast publishing): -- Log allowed tools once as a single line: `AllowedTools: ...`. -- For every edit: Read → Write (with precondition hash) → Re‑read; on `{status:"stale_file"}` retry once after re‑read. -- Keep evidence to ±20–40 lines windows; cap unified diffs to 300 lines and note truncation. -- End `` with `VERDICT: PASS` or `VERDICT: FAIL`. - -## Guardrails -- No destructive operations. Keep changes minimal and well‑scoped. -- Don’t leak secrets or environment details beyond what’s needed in the reports. -- Work without user interaction; do not prompt for approval mid‑flow. - -> If capabilities discovery fails, still produce the two reports that clearly explain why you could not proceed and what evidence you gathered. diff --git a/.claude/prompts/nl-unity-suite-full.md b/.claude/prompts/nl-unity-suite-full.md deleted file mode 100644 index 1b46127a..00000000 --- a/.claude/prompts/nl-unity-suite-full.md +++ /dev/null @@ -1,234 +0,0 @@ -# Unity NL/T Editing Suite — CI Agent Contract - -You are running inside CI for the `unity-mcp` repo. Use only the tools allowed by the workflow. Work autonomously; do not prompt the user. Do NOT spawn subagents. - -**Print this once, verbatim, early in the run:** -AllowedTools: Write,Bash(printf:*),Bash(echo:*),Bash(scripts/nlt-revert.sh:*),mcp__unity__manage_editor,mcp__unity__list_resources,mcp__unity__read_resource,mcp__unity__apply_text_edits,mcp__unity__script_apply_edits,mcp__unity__validate_script,mcp__unity__find_in_file,mcp__unity__read_console,mcp__unity__get_sha - ---- - -## Mission -1) Pick target file (prefer): - - `unity://path/Assets/Scripts/LongUnityScriptClaudeTest.cs` -2) Execute **all** NL/T tests in order using minimal, precise edits. -3) Validate each edit with `mcp__unity__validate_script(level:"standard")`. -4) **Report**: write one `` XML fragment per test to `reports/_results.xml`. Do **not** read or edit `$JUNIT_OUT`. -5) **Restore** the file after each test using the OS‑level helper (fast), not a full‑file text write. - ---- - -## Environment & Paths (CI) -- Always pass: `project_root: "TestProjects/UnityMCPTests"` and `ctx: {}` on list/read/edit/validate. -- **Canonical URIs only**: - - Primary: `unity://path/Assets/...` (never embed `project_root` in the URI) - - Relative (when supported): `Assets/...` -- File paths for the helper script are workspace‑relative: - - `TestProjects/UnityMCPTests/Assets/...` - -CI provides: -- `$JUNIT_OUT=reports/junit-nl-suite.xml` (pre‑created; leave alone) -- `$MD_OUT=reports/junit-nl-suite.md` (synthesized from JUnit) -- Helper script: `scripts/nlt-revert.sh` (snapshot/restore) - ---- - -## Tool Mapping -- **Anchors/regex/structured**: `mcp__unity__script_apply_edits` - - Allowed ops: `anchor_insert`, `replace_range`, `regex_replace` (no overlapping ranges within a single call) -- **Precise ranges / atomic batch**: `mcp__unity__apply_text_edits` (non‑overlapping ranges) - - Multi‑span batches are computed from the same fresh read and sent atomically by default. - - Prefer `options.applyMode:"atomic"` when passing options for multiple spans; for single‑span, sequential is fine. -- **Hash-only**: `mcp__unity__get_sha` — returns `{sha256,lengthBytes,lastModifiedUtc}` without file body -- **Validation**: `mcp__unity__validate_script(level:"standard")` - - For edits, you may pass `options.validate`: - - `standard` (default): full‑file delimiter balance checks. - - `relaxed`: scoped checks for interior, non‑structural text edits; do not use for header/signature/brace‑touching changes. -- **Reporting**: `Write` small XML fragments to `reports/*_results.xml` -- **Editor state/flush**: `mcp__unity__manage_editor` (use sparingly; no project mutations) -- **Console readback**: `mcp__unity__read_console` (INFO capture only; do not assert in place of `validate_script`) -- **Snapshot/Restore**: `Bash(scripts/nlt-revert.sh:*)` - - For `script_apply_edits`: use `name` + workspace‑relative `path` only (e.g., `name="LongUnityScriptClaudeTest"`, `path="Assets/Scripts"`). Do not pass `unity://...` URIs as `path`. - - For `apply_text_edits` / `read_resource`: use the URI form only (e.g., `uri="unity://path/Assets/Scripts/LongUnityScriptClaudeTest.cs"`). Do not concatenate `Assets/` with a `unity://...` URI. - - Never call generic Bash like `mkdir`; the revert helper creates needed directories. Use only `scripts/nlt-revert.sh` for snapshot/restore. - - If you believe a directory is missing, you are mistaken: the workflow pre-creates it and the snapshot helper creates it if needed. Do not attempt any Bash other than scripts/nlt-revert.sh:*. - -### Structured edit ops (required usage) - -# Insert a helper RIGHT BEFORE the final class brace (NL‑3, T‑D) -1) Prefer `script_apply_edits` with a regex capture on the final closing brace: -```json -{"op":"regex_replace", - "pattern":"(?s)(\\r?\\n\\s*\\})\\s*$", - "replacement":"\\n // Tail test A\\n // Tail test B\\n // Tail test C\\1"} - -2) If the server returns `unsupported` (op not available) or `missing_field` (op‑specific), FALL BACK to - `apply_text_edits`: - - Find the last `}` in the file (class closing brace) by scanning from end. - - Insert the three comment lines immediately before that index with one non‑overlapping range. - -# Insert after GetCurrentTarget (T‑A/T‑E) -- Use `script_apply_edits` with: -```json -{"op":"anchor_insert","afterMethodName":"GetCurrentTarget","text":"private int __TempHelper(int a,int b)=>a+b;\\n"} -``` - -# Delete the temporary helper (T‑A/T‑E) -- Prefer structured delete: - - Use `script_apply_edits` with `{ "op":"delete_method", "className":"LongUnityScriptClaudeTest", "methodName":"PrintSeries" }` (or `__TempHelper` for T‑A). -- If structured delete is unavailable, fall back to `apply_text_edits` with a single `replace_range` spanning the exact method block (bounds computed from a fresh read); avoid whole‑file regex deletes. - -# T‑B (replace method body) -- Use `mcp__unity__apply_text_edits` with a single `replace_range` strictly inside the `HasTarget` braces. -- Compute start/end from a fresh `read_resource` at test start. Do not edit signature or header. -- On `{status:"stale_file"}` retry once with the server-provided hash; if absent, re-read once and retry. -- On `bad_request`: write the testcase with ``, restore, and continue to next test. -- On `missing_field`: FALL BACK per above; if the fallback also returns `unsupported` or `bad_request`, then fail as above. -> Don’t use `mcp__unity__create_script`. Avoid the header/`using` region entirely. - -Span formats for `apply_text_edits`: -- Prefer LSP ranges (0‑based): `{ "range": { "start": {"line": L, "character": C}, "end": {…} }, "newText": "…" }` -- Explicit fields are 1‑based: `{ "startLine": L1, "startCol": C1, "endLine": L2, "endCol": C2, "newText": "…" }` -- SDK preflights overlap after normalization; overlapping non‑zero spans → `{status:"overlap"}` with conflicts and no file mutation. -- Optional debug: pass `strict:true` to reject explicit 0‑based fields (else they are normalized and a warning is emitted). -- Apply mode guidance: router defaults to atomic for multi‑span; you can explicitly set `options.applyMode` if needed. - ---- - -## Output Rules (JUnit fragments only) -- For each test, create **one** file: `reports/_results.xml` containing exactly a single ` ... `. - Put human-readable lines (PLAN/PROGRESS/evidence) **inside** ``. - - If content contains `]]>`, split CDATA: replace `]]>` with `]]]]>`. -- Evidence windows only (±20–40 lines). If showing a unified diff, cap at 100 lines and note truncation. -- **Never** open/patch `$JUNIT_OUT` or `$MD_OUT`; CI merges fragments and synthesizes Markdown. - - Write destinations must match: `^reports/[A-Za-z0-9._-]+_results\.xml$` - - Snapshot files must live under `reports/_snapshots/` - - Reject absolute paths and any path containing `..` - - Reject control characters and line breaks in filenames; enforce UTF‑8 - - Cap basename length to ≤64 chars; cap any path segment to ≤100 and total path length to ≤255 - - Bash(printf|echo) must write to stdout only. Do not use shell redirection, here‑docs, or `tee` to create/modify files. The only allowed FS mutation is via `scripts/nlt-revert.sh`. - -**Example fragment** -```xml - - -... evidence windows ... -VERDICT: PASS -]]> - - -``` - -Note: Emit the PLAN line only in NL‑0 (do not repeat it for later tests). - - -### Fast Restore Strategy (OS‑level) - -- Snapshot once at NL‑0, then restore after each test via the helper. -- Snapshot (once after confirming the target): - ```bash - scripts/nlt-revert.sh snapshot "TestProjects/UnityMCPTests/Assets/Scripts/LongUnityScriptClaudeTest.cs" "reports/_snapshots/LongUnityScriptClaudeTest.cs.baseline" - ``` -- Log `snapshot_sha=...` printed by the script. -- Restore (after each mutating test): - ```bash - scripts/nlt-revert.sh restore "TestProjects/UnityMCPTests/Assets/Scripts/LongUnityScriptClaudeTest.cs" "reports/_snapshots/LongUnityScriptClaudeTest.cs.baseline" - ``` -- Then `read_resource` to confirm and (optionally) `validate_script(level:"standard")`. -- If the helper fails: fall back once to a guarded full‑file restore using the baseline bytes; then continue. - -### Guarded Write Pattern (for edits, not restores) - -- Before any mutation: `res = mcp__unity__read_resource(uri)`; `pre_sha = sha256(res.bytes)`. -- Write with `precondition_sha256 = pre_sha` on `apply_text_edits`/`script_apply_edits`. -- To compute `pre_sha` without reading file contents, you may instead call `mcp__unity__get_sha(uri).sha256`. -- On `{status:"stale_file"}`: - - Retry once using the server-provided hash (e.g., `data.current_sha256` or `data.expected_sha256`, per API schema). - - If absent, one re-read then a final retry. No loops. -- After success: immediately re-read via `res2 = mcp__unity__read_resource(uri)` and set `pre_sha = sha256(res2.bytes)` before any further edits in the same test. -- Prefer anchors (`script_apply_edits`) for end-of-class / above-method insertions. Keep edits inside method bodies. Avoid header/using. - -**On non‑JSON/transport errors (timeout, EOF, connection closed):** -- Write `reports/_results.xml` with a `` that includes a `` or `` node capturing the error text. -- Run the OS restore via `scripts/nlt-revert.sh restore …`. -- Continue to the next test (do not abort). - -**If any write returns `bad_request`, or `unsupported` after a fallback attempt:** -- Write `reports/_results.xml` with a `` that includes a `` node capturing the server error, include evidence, and end with `VERDICT: FAIL`. -- Run `scripts/nlt-revert.sh restore ...` and continue to the next test. -### Execution Order (fixed) - -- Run exactly: NL-0, NL-1, NL-2, NL-3, NL-4, T-A, T-B, T-C, T-D, T-E, T-F, T-G, T-H, T-I, T-J (15 total). -- Before NL-1..T-J: Bash(scripts/nlt-revert.sh:restore "" "reports/_snapshots/LongUnityScriptClaudeTest.cs.baseline") IF the baseline exists; skip for NL-0. -- NL-0 must include the PLAN line (len=15). -- After each testcase, include `PROGRESS: /15 completed`. - - -### Test Specs (concise) - -- NL‑0. Sanity reads — Tail ~120; ±40 around `Update()`. Then snapshot via helper. -- NL‑1. Replace/insert/delete — `HasTarget → return currentTarget != null;`; insert `PrintSeries()` after `GetCurrentTarget` logging "1,2,3"; verify; delete `PrintSeries()`; restore. -- NL‑2. Anchor comment — Insert `// Build marker OK` above `public void Update(...)`; restore. -- NL‑3. End‑of‑class — Insert `// Tail test A/B/C` (3 lines) before final brace; restore. -- NL‑4. Compile trigger — Record INFO only. - -### T‑A. Anchor insert (text path) — Insert helper after `GetCurrentTarget`; verify; delete via `regex_replace`; restore. -### T‑B. Replace body — Single `replace_range` inside `HasTarget`; restore. -- Options: pass {"validate":"relaxed"} for interior one-line edits. -### T‑C. Header/region preservation — Edit interior of `ApplyBlend`; preserve signature/docs/regions; restore. -- Options: pass {"validate":"relaxed"} for interior one-line edits. -### T‑D. End‑of‑class (anchor) — Insert helper before final brace; remove; restore. -### T‑E. Lifecycle — Insert → update → delete via regex; restore. -### T‑F. Atomic batch — One `mcp__unity__apply_text_edits` call (text ranges only) - - Compute all three edits from the **same fresh read**: - 1) Two small interior `replace_range` tweaks. - 2) One **end‑of‑class insertion**: find the **index of the final `}`** for the class; create a zero‑width range `[idx, idx)` and set `replacement` to the 3‑line comment block. - - Send all three ranges in **one call**, sorted **descending by start index** to avoid offset drift. - - Expect all‑or‑nothing semantics; on `{status:"overlap"}` or `{status:"bad_request"}`, write the testcase fragment with ``, **restore**, and continue. - - Options: pass {"applyMode":"atomic"} to enforce all‑or‑nothing. -- T‑G. Path normalization — Make the same edit with `unity://path/Assets/...` then `Assets/...`. Without refreshing `precondition_sha256`, the second attempt returns `{stale_file}`; retry with the server-provided hash to confirm both forms resolve to the same file. - -### T-H. Validation (standard) -- Restore baseline (helper call above). -- Perform a harmless interior tweak (or none), then MUST call: - mcp__unity__validate_script(level:"standard") -- Write the validator output to system-out; VERDICT: PASS if standard is clean, else include with the validator message and continue. - -### T-I. Failure surfaces (expected) -- Restore baseline. -- (1) OVERLAP: - * Fresh read of file; compute two interior ranges that overlap inside HasTarget. - * Prefer LSP ranges (0‑based) or explicit 1‑based fields; ensure both spans come from the same snapshot. - * Single mcp__unity__apply_text_edits call with both ranges. - * Expect `{status:"overlap"}` (SDK preflight) → record as PASS; else FAIL. Restore. -- (2) STALE_FILE: - * Fresh read → pre_sha. - * Make a tiny legit edit with pre_sha; success. - * Attempt another edit reusing the OLD pre_sha. - * Expect {status:"stale_file"} → record as PASS; else FAIL. Re-read to refresh, restore. - -### Per‑test error handling and recovery -- For each test (NL‑0..T‑J), use a try/finally pattern: - - Always write a testcase fragment and perform restore in finally, even when tools return error payloads. - - try: run the test steps; always write `reports/_results.xml` with PASS/FAIL/ERROR - - finally: run Bash(scripts/nlt-revert.sh:restore …baseline) to restore the target file -- On any transport/JSON/tool exception: - - catch and write a `` fragment with an `` node (include the message), then proceed to the next test. -- After NL‑4 completes, proceed directly to T‑A regardless of any earlier validator warnings (do not abort the run). -- (3) USING_GUARD (optional): - * Attempt a 1-line insert above the first 'using'. - * Expect {status:"using_guard"} → record as PASS; else note 'not emitted'. Restore. - -### T-J. Idempotency -- Restore baseline. -- Repeat a replace_range twice (second call may be noop). Validate standard after each. -- Insert or ensure a tiny comment, then delete it twice (second delete may be noop). -- Restore and PASS unless an error/structural break occurred. - - -### Status & Reporting - -- Safeguard statuses are non‑fatal; record and continue. -- End each testcase `` with `VERDICT: PASS` or `VERDICT: FAIL`. \ No newline at end of file diff --git a/.github/workflows/claude-nl-suite-mini.yml b/.github/workflows/claude-nl-suite-mini.yml deleted file mode 100644 index 272e04d6..00000000 --- a/.github/workflows/claude-nl-suite-mini.yml +++ /dev/null @@ -1,356 +0,0 @@ -name: Claude Mini NL Test Suite (Unity live) - -on: - workflow_dispatch: {} - -permissions: - contents: read - checks: write - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -env: - UNITY_VERSION: 2021.3.45f1 - UNITY_IMAGE: unityci/editor:ubuntu-2021.3.45f1-linux-il2cpp-3 - UNITY_CACHE_ROOT: /home/runner/work/_temp/_github_home - -jobs: - nl-suite: - if: github.event_name == 'workflow_dispatch' - runs-on: ubuntu-latest - timeout-minutes: 60 - env: - JUNIT_OUT: reports/junit-nl-suite.xml - MD_OUT: reports/junit-nl-suite.md - - steps: - # ---------- Detect secrets ---------- - - name: Detect secrets (outputs) - id: detect - env: - UNITY_LICENSE: ${{ secrets.UNITY_LICENSE }} - UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }} - UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }} - UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - run: | - set -e - if [ -n "$ANTHROPIC_API_KEY" ]; then echo "anthropic_ok=true" >> "$GITHUB_OUTPUT"; else echo "anthropic_ok=false" >> "$GITHUB_OUTPUT"; fi - if [ -n "$UNITY_LICENSE" ] || { [ -n "$UNITY_EMAIL" ] && [ -n "$UNITY_PASSWORD" ]; } || [ -n "$UNITY_SERIAL" ]; then - echo "unity_ok=true" >> "$GITHUB_OUTPUT" - else - echo "unity_ok=false" >> "$GITHUB_OUTPUT" - fi - - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - # ---------- Python env for MCP server (uv) ---------- - - uses: astral-sh/setup-uv@v4 - with: - python-version: '3.11' - - - name: Install MCP server - run: | - set -eux - uv venv - echo "VIRTUAL_ENV=$GITHUB_WORKSPACE/.venv" >> "$GITHUB_ENV" - echo "$GITHUB_WORKSPACE/.venv/bin" >> "$GITHUB_PATH" - if [ -f UnityMcpBridge/UnityMcpServer~/src/pyproject.toml ]; then - uv pip install -e UnityMcpBridge/UnityMcpServer~/src - elif [ -f UnityMcpBridge/UnityMcpServer~/src/requirements.txt ]; then - uv pip install -r UnityMcpBridge/UnityMcpServer~/src/requirements.txt - elif [ -f UnityMcpBridge/UnityMcpServer~/pyproject.toml ]; then - uv pip install -e UnityMcpBridge/UnityMcpServer~/ - elif [ -f UnityMcpBridge/UnityMcpServer~/requirements.txt ]; then - uv pip install -r UnityMcpBridge/UnityMcpServer~/requirements.txt - else - echo "No MCP Python deps found (skipping)" - fi - - # ---------- License prime on host (handles ULF or EBL) ---------- - - name: Prime Unity license on host (GameCI) - if: steps.detect.outputs.unity_ok == 'true' - uses: game-ci/unity-test-runner@v4 - env: - UNITY_LICENSE: ${{ secrets.UNITY_LICENSE }} - UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }} - UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }} - UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }} - with: - projectPath: TestProjects/UnityMCPTests - testMode: EditMode - customParameters: -runTests -testFilter __NoSuchTest__ -batchmode -nographics - unityVersion: ${{ env.UNITY_VERSION }} - - # (Optional) Show where the license actually got written - - name: Inspect GameCI license caches (host) - if: steps.detect.outputs.unity_ok == 'true' - run: | - set -eux - find "${{ env.UNITY_CACHE_ROOT }}" -maxdepth 4 \( -path "*/.cache" -prune -o -type f \( -name '*.ulf' -o -name 'user.json' \) -print \) 2>/dev/null || true - - # ---------- Clean any stale MCP status from previous runs ---------- - - name: Clean old MCP status - run: | - set -eux - mkdir -p "$HOME/.unity-mcp" - rm -f "$HOME/.unity-mcp"/unity-mcp-status-*.json || true - - # ---------- Start headless Unity that stays up (bridge enabled) ---------- - - name: Start Unity (persistent bridge) - if: steps.detect.outputs.unity_ok == 'true' - env: - UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }} - UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }} - UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }} - run: | - set -eu - if [ ! -d "${{ github.workspace }}/TestProjects/UnityMCPTests/ProjectSettings" ]; then - echo "Unity project not found; failing fast." - exit 1 - fi - mkdir -p "$HOME/.unity-mcp" - MANUAL_ARG=() - if [ -f "${UNITY_CACHE_ROOT}/.local/share/unity3d/Unity_lic.ulf" ]; then - MANUAL_ARG=(-manualLicenseFile /root/.local/share/unity3d/Unity_lic.ulf) - fi - EBL_ARGS=() - [ -n "${UNITY_SERIAL:-}" ] && EBL_ARGS+=(-serial "$UNITY_SERIAL") - [ -n "${UNITY_EMAIL:-}" ] && EBL_ARGS+=(-username "$UNITY_EMAIL") - [ -n "${UNITY_PASSWORD:-}" ] && EBL_ARGS+=(-password "$UNITY_PASSWORD") - docker rm -f unity-mcp >/dev/null 2>&1 || true - docker run -d --name unity-mcp --network host \ - -e HOME=/root \ - -e UNITY_MCP_ALLOW_BATCH=1 -e UNITY_MCP_STATUS_DIR=/root/.unity-mcp \ - -e UNITY_MCP_BIND_HOST=127.0.0.1 \ - -v "${{ github.workspace }}:/workspace" -w /workspace \ - -v "${{ env.UNITY_CACHE_ROOT }}:/root" \ - -v "$HOME/.unity-mcp:/root/.unity-mcp" \ - ${{ env.UNITY_IMAGE }} /opt/unity/Editor/Unity -batchmode -nographics -logFile - \ - -stackTraceLogType Full \ - -projectPath /workspace/TestProjects/UnityMCPTests \ - "${MANUAL_ARG[@]}" \ - "${EBL_ARGS[@]}" \ - -executeMethod MCPForUnity.Editor.MCPForUnityBridge.StartAutoConnect - - # ---------- Wait for Unity bridge (fail fast if not running/ready) ---------- - - name: Wait for Unity bridge (robust) - if: steps.detect.outputs.unity_ok == 'true' - run: | - set -euo pipefail - if ! docker ps --format '{{.Names}}' | grep -qx 'unity-mcp'; then - echo "Unity container failed to start"; docker ps -a || true; exit 1 - fi - docker logs -f unity-mcp 2>&1 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' & LOGPID=$! - deadline=$((SECONDS+420)); READY=0 - try_connect_host() { - P="$1" - timeout 1 bash -lc "exec 3<>/dev/tcp/127.0.0.1/$P; head -c 8 <&3 >/dev/null" && return 0 || true - if command -v nc >/dev/null 2>&1; then nc -6 -z ::1 "$P" && return 0 || true; fi - return 1 - } - - # in-container probe will try IPv4 then IPv6 via nc or /dev/tcp - - while [ $SECONDS -lt $deadline ]; do - if docker logs unity-mcp 2>&1 | grep -qE "MCP Bridge listening|Bridge ready|Server started"; then - READY=1; echo "Bridge ready (log markers)"; break - fi - PORT=$(python -c "import os,glob,json,sys,time; b=os.path.expanduser('~/.unity-mcp'); fs=sorted(glob.glob(os.path.join(b,'unity-mcp-status-*.json')), key=os.path.getmtime, reverse=True); print(next((json.load(open(f,'r',encoding='utf-8')).get('unity_port') for f in fs if time.time()-os.path.getmtime(f)<=300 and json.load(open(f,'r',encoding='utf-8')).get('unity_port')), '' ))" 2>/dev/null || true) - if [ -n "${PORT:-}" ] && { try_connect_host "$PORT" || docker exec unity-mcp bash -lc "timeout 1 bash -lc 'exec 3<>/dev/tcp/127.0.0.1/$PORT' || (command -v nc >/dev/null 2>&1 && nc -6 -z ::1 $PORT)"; }; then - READY=1; echo "Bridge ready on port $PORT"; break - fi - if docker logs unity-mcp 2>&1 | grep -qE "No valid Unity Editor license|Token not found in cache|com\.unity\.editor\.headless"; then - echo "Licensing error detected"; break - fi - sleep 2 - done - - kill $LOGPID || true - - if [ "$READY" != "1" ]; then - echo "Bridge not ready; diagnostics:" - echo "== status files =="; ls -la "$HOME/.unity-mcp" || true - echo "== status contents =="; for f in "$HOME"/.unity-mcp/unity-mcp-status-*.json; do [ -f "$f" ] && { echo "--- $f"; sed -n '1,120p' "$f"; }; done - echo "== sockets (inside container) =="; docker exec unity-mcp bash -lc 'ss -lntp || netstat -tulpen || true' - echo "== tail of Unity log ==" - docker logs --tail 200 unity-mcp | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' || true - exit 1 - fi - - # ---------- Make MCP config available to the action ---------- - - name: Write MCP config (.claude/mcp.json) - run: | - set -eux - mkdir -p .claude - cat > .claude/mcp.json < str: - return tag.rsplit('}', 1)[-1] if '}' in tag else tag - - src = Path(os.environ.get('JUNIT_OUT', 'reports/junit-nl-suite.xml')) - out = Path('reports/junit-for-actions.xml') - out.parent.mkdir(parents=True, exist_ok=True) - - if not src.exists(): - # Try to use any existing XML as a source (e.g., claude-nl-tests.xml) - candidates = sorted(Path('reports').glob('*.xml')) - if candidates: - src = candidates[0] - else: - print("WARN: no XML source found for normalization") - - if src.exists(): - try: - root = ET.parse(src).getroot() - rtag = localname(root.tag) - if rtag == 'testsuites' and len(root) == 1 and localname(root[0].tag) == 'testsuite': - ET.ElementTree(root[0]).write(out, encoding='utf-8', xml_declaration=True) - else: - out.write_bytes(src.read_bytes()) - except Exception as e: - print("Normalization error:", e) - out.write_bytes(src.read_bytes()) - - # Always create a second copy with a junit-* name so wildcard patterns match too - if out.exists(): - Path('reports/junit-nl-suite-copy.xml').write_bytes(out.read_bytes()) - PY - - - name: "Debug: list report files" - if: always() - shell: bash - run: | - set -eux - ls -la reports || true - shopt -s nullglob - for f in reports/*.xml; do - echo "===== $f =====" - head -n 40 "$f" || true - done - - - # sanitize only the markdown (does not touch JUnit xml) - - name: Sanitize markdown (all shards) - if: always() - run: | - set -eu - python - <<'PY' - from pathlib import Path - rp=Path('reports') - rp.mkdir(parents=True, exist_ok=True) - for p in rp.glob('*.md'): - b=p.read_bytes().replace(b'\x00', b'') - s=b.decode('utf-8','replace').replace('\r\n','\n') - p.write_text(s, encoding='utf-8', newline='\n') - PY - - - name: NL/T details → Job Summary - if: always() - run: | - echo "## Unity NL/T Editing Suite — Full Coverage" >> $GITHUB_STEP_SUMMARY - python - <<'PY' >> $GITHUB_STEP_SUMMARY - from pathlib import Path - p = Path('reports/junit-nl-suite.md') if Path('reports/junit-nl-suite.md').exists() else Path('reports/claude-nl-tests.md') - if p.exists(): - text = p.read_bytes().decode('utf-8', 'replace') - MAX = 65000 - print(text[:MAX]) - if len(text) > MAX: - print("\n\n_…truncated in summary; full report is in artifacts._") - else: - print("_No markdown report found._") - PY - - - name: Fallback JUnit if missing - if: always() - run: | - set -eu - mkdir -p reports - if [ ! -f reports/junit-for-actions.xml ]; then - printf '%s\n' \ - '' \ - '' \ - ' ' \ - ' ' \ - ' ' \ - '' \ - > reports/junit-for-actions.xml - fi - - - - name: Publish JUnit reports - if: always() - uses: mikepenz/action-junit-report@v5 - with: - report_paths: 'reports/junit-for-actions.xml' - include_passed: true - detailed_summary: true - annotate_notice: true - require_tests: false - fail_on_parse_error: true - - - name: Upload artifacts - if: always() - uses: actions/upload-artifact@v4 - with: - name: claude-nl-suite-artifacts - path: reports/** - - # ---------- Always stop Unity ---------- - - name: Stop Unity - if: always() - run: | - docker logs --tail 400 unity-mcp | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' || true - docker rm -f unity-mcp || true diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index b4c86d92..0d9df165 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -227,7 +227,7 @@ jobs: # Fail fast only if container actually died st="$(docker inspect -f '{{.State.Status}} {{.State.ExitCode}}' unity-mcp 2>/dev/null || true)" - case "$st" in exited*|dead*) docker logs unity-mcp --tail 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig'; exit 1;; esac + case "$st" in exited*|dead*) docker logs unity-mcp --tail 200 | sed -E 's/((email|serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig'; exit 1;; esac # Patterns ok_pat='(Bridge|MCP(For)?Unity|AutoConnect).*(listening|ready|started|port|bound)' @@ -253,14 +253,14 @@ jobs: # Only treat license failures as fatal *after* warm-up if [ $SECONDS -ge $fatal_after ] && echo "$logs" | grep -qiE "$license_fatal"; then echo "::error::Fatal licensing signal detected after warm-up" - echo "$logs" | tail -n 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig' + echo "$logs" | tail -n 200 | sed -E 's/((email|serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig' exit 1 fi # If the container dies mid-wait, bail st="$(docker inspect -f '{{.State.Status}}' unity-mcp 2>/dev/null || true)" if [[ "$st" != "running" ]]; then - echo "::error::Unity container exited during wait"; docker logs unity-mcp --tail 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig' + echo "::error::Unity container exited during wait"; docker logs unity-mcp --tail 200 | sed -E 's/((email|serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig' exit 1 fi @@ -268,7 +268,7 @@ jobs: done echo "::error::Bridge not ready before deadline" - docker logs unity-mcp --tail 200 | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig' + docker logs unity-mcp --tail 200 | sed -E 's/((email|serial|license|password|token)[^[:space:]]*)/[REDACTED]/Ig' exit 1 # (moved) — return license after Unity is stopped @@ -800,7 +800,7 @@ jobs: - name: Stop Unity if: always() run: | - docker logs --tail 400 unity-mcp | sed -E 's/((serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' || true + docker logs --tail 400 unity-mcp | sed -E 's/((email|serial|license|password|token)[^[:space:]]*)/[REDACTED]/ig' || true docker rm -f unity-mcp || true - name: Return Pro license (if used) From 8234a5df100eea04680bc1cc020804d1985b8fd0 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Sat, 6 Sep 2025 10:13:44 -0700 Subject: [PATCH 25/28] NL/T prompt: enforce allowed ops, require per-test fragment emission (incl. failures), add T-F..T-J XML templates --- .../prompts/nl-unity-suite-full-additive.md | 72 ++++++++++++++++++- 1 file changed, 69 insertions(+), 3 deletions(-) diff --git a/.claude/prompts/nl-unity-suite-full-additive.md b/.claude/prompts/nl-unity-suite-full-additive.md index 9d98ecd7..70839a42 100644 --- a/.claude/prompts/nl-unity-suite-full-additive.md +++ b/.claude/prompts/nl-unity-suite-full-additive.md @@ -31,7 +31,7 @@ AllowedTools: Write,mcp__unity__manage_editor,mcp__unity__list_resources,mcp__un - If test fails, include: `` - TESTID must be one of: NL-0, NL-1, NL-2, NL-3, NL-4, T-A, T-B, T-C, T-D, T-E, T-F, T-G, T-H, T-I, T-J 5) **NO RESTORATION** - tests build additively on previous state. -6) **STRICT FRAGMENT EMISSION** - After completing T-D and T-J, immediately emit a clean XML file under `reports/_results.xml` with exactly one `` whose `name` begins with the exact test id. No prologue/epilogue or fences. +6) **STRICT FRAGMENT EMISSION** - After each test, immediately emit a clean XML file under `reports/_results.xml` with exactly one `` whose `name` begins with the exact test id. No prologue/epilogue or fences. If the test fails, include a `` and still emit. --- @@ -52,6 +52,10 @@ CI provides: - Allowed ops: `anchor_insert`, `replace_method`, `insert_method`, `delete_method`, `regex_replace` - For `anchor_insert`, always set `"position": "before"` or `"after"`. - **Precise ranges / atomic batch**: `mcp__unity__apply_text_edits` (non‑overlapping ranges) +STRICT OP GUARDRAILS +- Do not use `anchor_replace`. Structured edits must be one of: `anchor_insert`, `replace_method`, `insert_method`, `delete_method`, `regex_replace`. +- For multi‑spot textual tweaks in one operation, compute non‑overlapping ranges with `mcp__unity__find_in_file` and use `mcp__unity__apply_text_edits`. + - **Hash-only**: `mcp__unity__get_sha` — returns `{sha256,lengthBytes,lastModifiedUtc}` without file body - **Validation**: `mcp__unity__validate_script(level:"standard")` - **Dynamic targeting**: Use `mcp__unity__find_in_file` to locate current positions of methods/markers @@ -250,7 +254,8 @@ find_in_file(pattern: "public bool HasTarget\\(\\)") 1. Verify expected content exists: `find_in_file` for key markers 2. Check structural integrity: `validate_script(level:"standard")` 3. Update SHA tracking for next test's preconditions -4. Log cumulative changes in test evidence +4. Emit a per‑test fragment to `reports/_results.xml` immediately. If the test failed, still write a single `` with a `` and evidence in `system-out`. +5. Log cumulative changes in test evidence **Error Recovery:** - If test fails, log current state but continue (don't restore) @@ -273,4 +278,65 @@ This additive approach produces a more realistic and maintainable test suite tha --- BAN ON EXTRA TOOLS AND DIRS -- Do not use any tools outside `AllowedTools`. Do not create directories; assume `reports/` exists. \ No newline at end of file +- Do not use any tools outside `AllowedTools`. Do not create directories; assume `reports/` exists. + +--- + +## XML Fragment Templates (T-F .. T-J) + +Use these skeletons verbatim as a starting point. Replace the bracketed placeholders with your evidence and the latest SHA. Ensure each file contains exactly one `` element and that the `name` begins with the exact test id. + +```xml + + + +``` + +```xml + + + +``` + +```xml + + + +``` + +```xml + + + +``` + +```xml + + + +``` \ No newline at end of file From c92f6051e82d6c1853c2ca8267d92d87df59e094 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Sat, 6 Sep 2025 10:33:23 -0700 Subject: [PATCH 26/28] NL suite: enforce strict NL-4 emission; remove brittle relabeling; keep canonicalization + backfill --- .claude/prompts/nl-unity-suite-full-additive.md | 11 +++++++++++ .github/workflows/claude-nl-suite.yml | 8 ++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/.claude/prompts/nl-unity-suite-full-additive.md b/.claude/prompts/nl-unity-suite-full-additive.md index 70839a42..78681ea5 100644 --- a/.claude/prompts/nl-unity-suite-full-additive.md +++ b/.claude/prompts/nl-unity-suite-full-additive.md @@ -125,6 +125,7 @@ STRICT OP GUARDRAILS - Read Unity console messages (INFO level) - Validate no compilation errors from previous operations - **Expected final state**: State C (unchanged) + - **IMMEDIATELY** write clean XML fragment to `reports/NL-4_results.xml` (no extra text). The `` must start with `NL-4`. Include brief evidence (e.g., a few recent console lines or an explicit "no compile errors" note) in `system-out`. ### T-A. Temporary Helper Lifecycle (Returns to State C) **Goal**: Test insert → verify → delete cycle for temporary code @@ -299,6 +300,16 @@ SHA: [sha-here] ``` +```xml + + + +``` + ```xml "{new}"') + + # Note: Do not auto-relable fragments. We rely on per-test strict emission + # and the backfill step to surface missing tests explicitly. PY - name: Backfill missing NL/T tests (fail placeholders) From 25985160a60758a010c45fa865c408ad3fbf04a1 Mon Sep 17 00:00:00 2001 From: dsarno Date: Sat, 6 Sep 2025 12:44:29 -0700 Subject: [PATCH 27/28] NL/T: minimize transcript; tighten NL-4 console reads; add final errors scan in T-J --- .../prompts/nl-unity-suite-full-additive.md | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/.claude/prompts/nl-unity-suite-full-additive.md b/.claude/prompts/nl-unity-suite-full-additive.md index 78681ea5..ba0fbef5 100644 --- a/.claude/prompts/nl-unity-suite-full-additive.md +++ b/.claude/prompts/nl-unity-suite-full-additive.md @@ -47,6 +47,18 @@ CI provides: --- +## Transcript Minimization Rules +- Do not restate tool JSON; summarize in ≤ 2 short lines. +- Never paste full file contents. For matches, include only the matched line and ±1 line. +- Prefer `mcp__unity__find_in_file` for targeting; avoid `mcp__unity__read_resource` unless strictly necessary. If needed, limit to `head_bytes ≤ 256` or `tail_lines ≤ 10`. +- Per‑test `system-out` ≤ 400 chars: brief status + latest SHA only. +- Console evidence: fetch the last 10 lines and include ≤ 3 lines in the fragment. +- Avoid quoting multi‑line diffs; reference markers instead. +— Console scans: perform two reads — last 10 `log/info` lines and up to 3 `error` entries; include ≤ 3 lines total in the fragment; if no errors, state "no errors". +— Final check is folded into T‑J: perform an errors‑only scan and include a single "no errors" line or up to 3 error lines within the T‑J fragment. + +--- + ## Tool Mapping - **Anchors/regex/structured**: `mcp__unity__script_apply_edits` - Allowed ops: `anchor_insert`, `replace_method`, `insert_method`, `delete_method`, `regex_replace` @@ -122,10 +134,11 @@ STRICT OP GUARDRAILS ### NL-4. Console State Verification (No State Change) **Goal**: Verify Unity console integration without file modification **Actions**: -- Read Unity console messages (INFO level) +- Read last 10 Unity console lines (log/info) +- Perform a targeted scan for errors/exceptions (type: errors), up to 3 entries - Validate no compilation errors from previous operations - **Expected final state**: State C (unchanged) - - **IMMEDIATELY** write clean XML fragment to `reports/NL-4_results.xml` (no extra text). The `` must start with `NL-4`. Include brief evidence (e.g., a few recent console lines or an explicit "no compile errors" note) in `system-out`. +- **IMMEDIATELY** write clean XML fragment to `reports/NL-4_results.xml` (no extra text). The `` must start with `NL-4`. Include at most 3 lines total across both reads, or simply state "no errors; console OK" (≤ 400 chars), plus the latest SHA. ### T-A. Temporary Helper Lifecycle (Returns to State C) **Goal**: Test insert → verify → delete cycle for temporary code @@ -218,7 +231,8 @@ STRICT OP GUARDRAILS - **Remove (structured)**: `{"op":"regex_replace","pattern":"(?m)^\\s*// idempotency test marker\\r?\\n?","text":""}` - **Remove again** (same `regex_replace`) → expect `no_op: true`. - `mcp__unity__validate_script(level:"standard")` -- **IMMEDIATELY** write clean XML fragment to `reports/T-J_results.xml` with evidence of both `no_op: true` outcomes. The `` must start with `T-J` and include the latest SHA. +- Perform a final console scan for errors/exceptions (errors only, up to 3); include "no errors" if none +- **IMMEDIATELY** write clean XML fragment to `reports/T-J_results.xml` with evidence of both `no_op: true` outcomes and the console result. The `` must start with `T-J` and include the latest SHA. - **Expected final state**: State H + verified idempotent behavior --- @@ -256,7 +270,7 @@ find_in_file(pattern: "public bool HasTarget\\(\\)") 2. Check structural integrity: `validate_script(level:"standard")` 3. Update SHA tracking for next test's preconditions 4. Emit a per‑test fragment to `reports/_results.xml` immediately. If the test failed, still write a single `` with a `` and evidence in `system-out`. -5. Log cumulative changes in test evidence +5. Log cumulative changes in test evidence (keep concise per Transcript Minimization Rules; never paste raw tool JSON) **Error Recovery:** - If test fails, log current state but continue (don't restore) From 7a73d988e691dbe9903c92fa60aed37d74bc8f49 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Sun, 7 Sep 2025 13:44:12 -0700 Subject: [PATCH 28/28] CI: add staged report fragment promotion step (reports/_staging -> reports/) to support multi-edit reporting --- .github/workflows/claude-nl-suite.yml | 42 ++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 2fdef6d4..6cc5db30 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -300,7 +300,7 @@ jobs: run: | set -eux rm -f reports/*.xml reports/*.md || true - mkdir -p reports reports/_snapshots + mkdir -p reports reports/_snapshots reports/_staging - name: Create report skeletons run: | @@ -343,6 +343,46 @@ jobs: timeout_minutes: "30" anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + - name: Finalize staged report fragments (promote to reports/) + if: always() + shell: bash + run: | + python3 - <<'PY' + from pathlib import Path + import xml.etree.ElementTree as ET, re + + def id_from_filename(p: Path): + n = p.name + m = re.match(r'NL(\d+)_results\.xml$', n, re.I) + if m: + return f"NL-{int(m.group(1))}" + m = re.match(r'T([A-J])_results\.xml$', n, re.I) + if m: + return f"T-{m.group(1).upper()}" + return None + + src_dir = Path('reports/_staging') + dst_dir = Path('reports') + dst_dir.mkdir(parents=True, exist_ok=True) + + for frag in sorted(src_dir.glob('*_results.xml')): + try: + tree = ET.parse(frag); root = tree.getroot() + except Exception: + continue + if root.tag != 'testcase': + continue + file_id = id_from_filename(frag) + old = root.get('name') or '' + if file_id: + title = re.sub(r'^\s*(NL-\d+|T-[A-Z])\s*[—–:\-]\s*', '', old).strip() + new = f"{file_id} — {title}" if title else file_id + if new != old and new: + root.set('name', new) + out = dst_dir / frag.name + tree.write(out, encoding='utf-8', xml_declaration=False) + PY + - name: Canonicalize testcase names (NL/T prefixes) if: always() shell: bash