Skip to content

Commit a623d07

Browse files
stephentoubCopilot
andauthored
Harden permission-reject E2E tests across all SDKs (#1194) (#1317)
The existing "reject permission" E2E test in every SDK asserted only that the target file was unchanged after a model-driven edit attempt. That is a false-positive-prone check: it passes even when the agent freezes, when the permission decision is silently dropped, or when the wrong discriminator is sent. Specifically, it would not have caught the .NET `PermissionDecision` empty-JSON regression reported in #1194 (since fixed in codegen). Strengthen each SDK's reject test to additionally assert that the CLI emits a `tool.execution_complete` event whose error message contains "user rejected" (case-insensitive). The CLI emits a kind-specific message for the reject decision ("The user rejected this tool call.") vs. the distinct message for user-not-available, so this asserts that the specific `reject` discriminator round-tripped end-to-end - exactly the property that was broken in #1194. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent fc11032 commit a623d07

5 files changed

Lines changed: 119 additions & 0 deletions

File tree

dotnet/test/E2E/PermissionE2ETests.cs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,23 @@ public async Task Should_Deny_Permission_When_Handler_Returns_Denied()
9393
}
9494
});
9595

96+
// Regression check for https://github.com/github/copilot-sdk/issues/1194:
97+
// the reject decision must round-trip through the CLI with its discriminator
98+
// intact so the agent surfaces the user-rejected error to the model. The
99+
// CLI uses a kind-specific error message ("The user rejected this tool call.")
100+
// for the reject decision, which lets us assert the decision was honored
101+
// — not merely that the operation didn't happen.
102+
var userRejectedToolCall = false;
103+
session.On(evt =>
104+
{
105+
if (evt is ToolExecutionCompleteEvent toolEvt &&
106+
!toolEvt.Data.Success &&
107+
toolEvt.Data.Error?.Message.Contains("user rejected", StringComparison.OrdinalIgnoreCase) == true)
108+
{
109+
userRejectedToolCall = true;
110+
}
111+
});
112+
96113
var testFilePath = Path.Combine(Ctx.WorkDir, "protected.txt");
97114
await File.WriteAllTextAsync(testFilePath, "protected content");
98115

@@ -103,6 +120,10 @@ await session.SendAsync(new MessageOptions
103120

104121
await TestHelper.GetFinalAssistantMessageAsync(session);
105122

123+
Assert.True(
124+
userRejectedToolCall,
125+
"Expected a tool.execution_complete event whose error indicates the user rejected the call.");
126+
106127
// Verify the file was NOT modified
107128
var content = await File.ReadAllTextAsync(testFilePath);
108129
Assert.Equal("protected content", content);

go/internal/e2e/permissions_e2e_test.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,26 @@ func TestPermissionsE2E(t *testing.T) {
130130
t.Fatalf("Failed to create session: %v", err)
131131
}
132132

133+
// Regression check for https://github.com/github/copilot-sdk/issues/1194:
134+
// the reject decision must round-trip through the CLI with its discriminator
135+
// intact so the agent surfaces the user-rejected error to the model. The
136+
// CLI emits a kind-specific error message ("The user rejected this tool call.")
137+
// for the reject decision, which lets us assert the decision was honored
138+
// — not merely that the operation didn't happen.
139+
var mu sync.Mutex
140+
userRejectedToolCall := false
141+
142+
session.On(func(event copilot.SessionEvent) {
143+
if d, ok := event.Data.(*copilot.ToolExecutionCompleteData); ok &&
144+
!d.Success &&
145+
d.Error != nil &&
146+
strings.Contains(strings.ToLower(d.Error.Message), "user rejected") {
147+
mu.Lock()
148+
userRejectedToolCall = true
149+
mu.Unlock()
150+
}
151+
})
152+
133153
testFile := filepath.Join(ctx.WorkDir, "protected.txt")
134154
originalContent := []byte("protected content")
135155
err = os.WriteFile(testFile, originalContent, 0644)
@@ -149,6 +169,12 @@ func TestPermissionsE2E(t *testing.T) {
149169
t.Fatalf("Failed to get final message: %v", err)
150170
}
151171

172+
mu.Lock()
173+
if !userRejectedToolCall {
174+
t.Error("Expected a tool.execution_complete event whose error indicates the user rejected the call.")
175+
}
176+
mu.Unlock()
177+
152178
// Verify the file was NOT modified
153179
content, err := os.ReadFile(testFile)
154180
if err != nil {

nodejs/test/e2e/permissions.e2e.test.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,23 @@ describe("Permission callbacks", async () => {
5555
},
5656
});
5757

58+
// Regression check for https://github.com/github/copilot-sdk/issues/1194:
59+
// the reject decision must round-trip through the CLI with its discriminator
60+
// intact so the agent surfaces the user-rejected error to the model. The
61+
// CLI emits a kind-specific error message ("The user rejected this tool call.")
62+
// for the reject decision, which lets us assert the decision was honored
63+
// — not merely that the operation didn't happen.
64+
let userRejectedToolCall = false;
65+
session.on((event) => {
66+
if (
67+
event.type === "tool.execution_complete" &&
68+
!event.data.success &&
69+
event.data.error?.message.toLowerCase().includes("user rejected")
70+
) {
71+
userRejectedToolCall = true;
72+
}
73+
});
74+
5875
const originalContent = "protected content";
5976
const testFile = join(workDir, "protected.txt");
6077
await writeFile(testFile, originalContent);
@@ -63,6 +80,8 @@ describe("Permission callbacks", async () => {
6380
prompt: "Edit protected.txt and replace 'protected' with 'hacked'.",
6481
});
6582

83+
expect(userRejectedToolCall).toBe(true);
84+
6685
// Verify the file was NOT modified
6786
const content = await readFile(testFile, "utf-8");
6887
expect(content).toBe(originalContent);

python/e2e/test_permissions_e2e.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,35 @@ def on_permission_request(
5656

5757
session = await ctx.client.create_session(on_permission_request=on_permission_request)
5858

59+
# Regression check for https://github.com/github/copilot-sdk/issues/1194:
60+
# the reject decision must round-trip through the CLI with its discriminator
61+
# intact so the agent surfaces the user-rejected error to the model. The
62+
# CLI emits a kind-specific error message ("The user rejected this tool call.")
63+
# for the reject decision, which lets us assert the decision was honored
64+
# — not merely that the operation didn't happen.
65+
user_rejected_events = []
66+
67+
def on_event(event):
68+
match event.data:
69+
case ToolExecutionCompleteData(success=False) as data:
70+
error = data.error
71+
msg = (
72+
error
73+
if isinstance(error, str)
74+
else (getattr(error, "message", None) if error is not None else None)
75+
)
76+
if msg and "user rejected" in msg.lower():
77+
user_rejected_events.append(event)
78+
79+
session.on(on_event)
80+
5981
original_content = "protected content"
6082
write_file(ctx.work_dir, "protected.txt", original_content)
6183

6284
await session.send_and_wait("Edit protected.txt and replace 'protected' with 'hacked'.")
6385

86+
assert len(user_rejected_events) > 0
87+
6488
# Verify the file was NOT modified
6589
content = read_file(ctx.work_dir, "protected.txt")
6690
assert content == original_content

rust/tests/e2e/permissions.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,25 @@ async fn should_deny_permission_when_handler_returns_denied() {
8383
.await
8484
.expect("create session");
8585

86+
// Regression check for https://github.com/github/copilot-sdk/issues/1194:
87+
// the reject decision must round-trip through the CLI with its
88+
// discriminator intact so the agent surfaces the user-rejected error
89+
// to the model. The CLI emits a kind-specific error message
90+
// ("The user rejected this tool call.") for the reject decision,
91+
// which lets us assert the decision was honored — not merely that
92+
// the operation didn't happen.
93+
let events = session.subscribe();
94+
8695
session
8796
.send_and_wait("Edit protected.txt and replace 'protected' with 'hacked'.")
8897
.await
8998
.expect("send");
9099

100+
wait_for_event(events, "user-rejected tool completion", |event| {
101+
is_user_rejected_tool_completion(event)
102+
})
103+
.await;
104+
91105
let content = std::fs::read_to_string(&test_file).expect("read protected file");
92106
assert_eq!(content, "protected content");
93107

@@ -546,6 +560,21 @@ fn is_permission_denied_tool_completion(event: &github_copilot_sdk::SessionEvent
546560
.unwrap_or(false)
547561
}
548562

563+
fn is_user_rejected_tool_completion(event: &github_copilot_sdk::SessionEvent) -> bool {
564+
if event.parsed_type() != SessionEventType::ToolExecutionComplete {
565+
return false;
566+
}
567+
let data = event
568+
.typed_data::<ToolExecutionCompleteData>()
569+
.expect("tool.execution_complete data");
570+
!data.success
571+
&& data
572+
.error
573+
.as_ref()
574+
.map(|error| error.message.to_lowercase().contains("user rejected"))
575+
.unwrap_or(false)
576+
}
577+
549578
fn permission_request_tool_call_id(request: &PermissionRequestData) -> Option<&str> {
550579
request
551580
.tool_call_id

0 commit comments

Comments
 (0)