Harden permission-reject E2E tests across all SDKs (#1194) (#1317)

stephentoub · Copilot · web-flow · commit a623d0727622 · 2026-05-16T22:44:55.000-04:00
The existing "reject permission" E2E test in every SDK asserted only that the target file was unchanged after a model-driven edit attempt. That is a false-positive-prone check: it passes even when the agent freezes, when the permission decision is silently dropped, or when the wrong discriminator is sent. Specifically, it would not have caught the .NET `PermissionDecision` empty-JSON regression reported in #1194 (since fixed in codegen). Strengthen each SDK's reject test to additionally assert that the CLI emits a `tool.execution_complete` event whose error message contains "user rejected" (case-insensitive). The CLI emits a kind-specific message for the reject decision ("The user rejected this tool call.") vs. the distinct message for user-not-available, so this asserts that the specific `reject` discriminator round-tripped end-to-end - exactly the property that was broken in #1194. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
diff --git a/dotnet/test/E2E/PermissionE2ETests.cs b/dotnet/test/E2E/PermissionE2ETests.cs
@@ -93,6 +93,23 @@ public async Task Should_Deny_Permission_When_Handler_Returns_Denied()
             }
         });
 
+        // Regression check for https://github.com/github/copilot-sdk/issues/1194:
+        // the reject decision must round-trip through the CLI with its discriminator
+        // intact so the agent surfaces the user-rejected error to the model. The
+        // CLI uses a kind-specific error message ("The user rejected this tool call.")
+        // for the reject decision, which lets us assert the decision was honored
+        // — not merely that the operation didn't happen.
+        var userRejectedToolCall = false;
+        session.On(evt =>
+        {
+            if (evt is ToolExecutionCompleteEvent toolEvt &&
+                !toolEvt.Data.Success &&
+                toolEvt.Data.Error?.Message.Contains("user rejected", StringComparison.OrdinalIgnoreCase) == true)
+            {
+                userRejectedToolCall = true;
+            }
+        });
+
         var testFilePath = Path.Combine(Ctx.WorkDir, "protected.txt");
         await File.WriteAllTextAsync(testFilePath, "protected content");
 
@@ -103,6 +120,10 @@ await session.SendAsync(new MessageOptions
 
         await TestHelper.GetFinalAssistantMessageAsync(session);
 
+        Assert.True(
+            userRejectedToolCall,
+            "Expected a tool.execution_complete event whose error indicates the user rejected the call.");
+
         // Verify the file was NOT modified
         var content = await File.ReadAllTextAsync(testFilePath);
         Assert.Equal("protected content", content);
diff --git a/go/internal/e2e/permissions_e2e_test.go b/go/internal/e2e/permissions_e2e_test.go
@@ -130,6 +130,26 @@ func TestPermissionsE2E(t *testing.T) {
 			t.Fatalf("Failed to create session: %v", err)
 		}
 
+		// Regression check for https://github.com/github/copilot-sdk/issues/1194:
+		// the reject decision must round-trip through the CLI with its discriminator
+		// intact so the agent surfaces the user-rejected error to the model. The
+		// CLI emits a kind-specific error message ("The user rejected this tool call.")
+		// for the reject decision, which lets us assert the decision was honored
+		// — not merely that the operation didn't happen.
+		var mu sync.Mutex
+		userRejectedToolCall := false
+
+		session.On(func(event copilot.SessionEvent) {
+			if d, ok := event.Data.(*copilot.ToolExecutionCompleteData); ok &&
+				!d.Success &&
+				d.Error != nil &&
+				strings.Contains(strings.ToLower(d.Error.Message), "user rejected") {
+				mu.Lock()
+				userRejectedToolCall = true
+				mu.Unlock()
+			}
+		})
+
 		testFile := filepath.Join(ctx.WorkDir, "protected.txt")
 		originalContent := []byte("protected content")
 		err = os.WriteFile(testFile, originalContent, 0644)
@@ -149,6 +169,12 @@ func TestPermissionsE2E(t *testing.T) {
 			t.Fatalf("Failed to get final message: %v", err)
 		}
 
+		mu.Lock()
+		if !userRejectedToolCall {
+			t.Error("Expected a tool.execution_complete event whose error indicates the user rejected the call.")
+		}
+		mu.Unlock()
+
 		// Verify the file was NOT modified
 		content, err := os.ReadFile(testFile)
 		if err != nil {
diff --git a/nodejs/test/e2e/permissions.e2e.test.ts b/nodejs/test/e2e/permissions.e2e.test.ts
@@ -55,6 +55,23 @@ describe("Permission callbacks", async () => {
             },
         });
 
+        // Regression check for https://github.com/github/copilot-sdk/issues/1194:
+        // the reject decision must round-trip through the CLI with its discriminator
+        // intact so the agent surfaces the user-rejected error to the model. The
+        // CLI emits a kind-specific error message ("The user rejected this tool call.")
+        // for the reject decision, which lets us assert the decision was honored
+        // — not merely that the operation didn't happen.
+        let userRejectedToolCall = false;
+        session.on((event) => {
+            if (
+                event.type === "tool.execution_complete" &&
+                !event.data.success &&
+                event.data.error?.message.toLowerCase().includes("user rejected")
+            ) {
+                userRejectedToolCall = true;
+            }
+        });
+
         const originalContent = "protected content";
         const testFile = join(workDir, "protected.txt");
         await writeFile(testFile, originalContent);
@@ -63,6 +80,8 @@ describe("Permission callbacks", async () => {
             prompt: "Edit protected.txt and replace 'protected' with 'hacked'.",
         });
 
+        expect(userRejectedToolCall).toBe(true);
+
         // Verify the file was NOT modified
         const content = await readFile(testFile, "utf-8");
         expect(content).toBe(originalContent);
diff --git a/python/e2e/test_permissions_e2e.py b/python/e2e/test_permissions_e2e.py
@@ -56,11 +56,35 @@ def on_permission_request(
 
         session = await ctx.client.create_session(on_permission_request=on_permission_request)
 
+        # Regression check for https://github.com/github/copilot-sdk/issues/1194:
+        # the reject decision must round-trip through the CLI with its discriminator
+        # intact so the agent surfaces the user-rejected error to the model. The
+        # CLI emits a kind-specific error message ("The user rejected this tool call.")
+        # for the reject decision, which lets us assert the decision was honored
+        # — not merely that the operation didn't happen.
+        user_rejected_events = []
+
+        def on_event(event):
+            match event.data:
+                case ToolExecutionCompleteData(success=False) as data:
+                    error = data.error
+                    msg = (
+                        error
+                        if isinstance(error, str)
+                        else (getattr(error, "message", None) if error is not None else None)
+                    )
+                    if msg and "user rejected" in msg.lower():
+                        user_rejected_events.append(event)
+
+        session.on(on_event)
+
         original_content = "protected content"
         write_file(ctx.work_dir, "protected.txt", original_content)
 
         await session.send_and_wait("Edit protected.txt and replace 'protected' with 'hacked'.")
 
+        assert len(user_rejected_events) > 0
+
         # Verify the file was NOT modified
         content = read_file(ctx.work_dir, "protected.txt")
         assert content == original_content
diff --git a/rust/tests/e2e/permissions.rs b/rust/tests/e2e/permissions.rs
@@ -83,11 +83,25 @@ async fn should_deny_permission_when_handler_returns_denied() {
                     .await
                     .expect("create session");
 
+                // Regression check for https://github.com/github/copilot-sdk/issues/1194:
+                // the reject decision must round-trip through the CLI with its
+                // discriminator intact so the agent surfaces the user-rejected error
+                // to the model. The CLI emits a kind-specific error message
+                // ("The user rejected this tool call.") for the reject decision,
+                // which lets us assert the decision was honored — not merely that
+                // the operation didn't happen.
+                let events = session.subscribe();
+
                 session
                     .send_and_wait("Edit protected.txt and replace 'protected' with 'hacked'.")
                     .await
                     .expect("send");
 
+                wait_for_event(events, "user-rejected tool completion", |event| {
+                    is_user_rejected_tool_completion(event)
+                })
+                .await;
+
                 let content = std::fs::read_to_string(&test_file).expect("read protected file");
                 assert_eq!(content, "protected content");
 
@@ -546,6 +560,21 @@ fn is_permission_denied_tool_completion(event: &github_copilot_sdk::SessionEvent
             .unwrap_or(false)
 }
 
+fn is_user_rejected_tool_completion(event: &github_copilot_sdk::SessionEvent) -> bool {
+    if event.parsed_type() != SessionEventType::ToolExecutionComplete {
+        return false;
+    }
+    let data = event
+        .typed_data::<ToolExecutionCompleteData>()
+        .expect("tool.execution_complete data");
+    !data.success
+        && data
+            .error
+            .as_ref()
+            .map(|error| error.message.to_lowercase().contains("user rejected"))
+            .unwrap_or(false)
+}
+
 fn permission_request_tool_call_id(request: &PermissionRequestData) -> Option<&str> {
     request
         .tool_call_id