Skip to content

Commit 510ecd2

Browse files
Improve image support for tool calls (#15765)
- Introduce ToolCallResult type for better of known MCP result types - Add visual playwright MCP server to AppTester (can use screenshots) - Improve rendering for tool call results
1 parent 176018e commit 510ecd2

File tree

17 files changed

+285
-149
lines changed

17 files changed

+285
-149
lines changed

packages/ai-anthropic/src/node/anthropic-language-model.ts

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,12 @@ import {
2626
TokenUsageParams,
2727
UserRequest,
2828
ImageContent,
29+
ToolCallResult,
2930
ImageMimeType
3031
} from '@theia/ai-core';
3132
import { CancellationToken, isArray } from '@theia/core';
3233
import { Anthropic } from '@anthropic-ai/sdk';
33-
import { Message, MessageParam, Base64ImageSource } from '@anthropic-ai/sdk/resources';
34+
import type { Base64ImageSource, ImageBlockParam, Message, MessageParam, TextBlockParam, ToolResultBlockParam } from '@anthropic-ai/sdk/resources';
3435

3536
export const DEFAULT_MAX_TOKENS = 4096;
3637

@@ -193,10 +194,17 @@ export class AnthropicModel implements LanguageModel {
193194
}
194195
}
195196

196-
protected formatToolCallResult(result: unknown): string | Array<{ type: 'text', text: string }> {
197-
if (typeof result === 'object' && result && 'content' in result && Array.isArray(result.content) &&
198-
result.content.every(item => typeof item === 'object' && item && 'type' in item && 'text' in item)) {
199-
return result.content;
197+
protected formatToolCallResult(result: ToolCallResult): ToolResultBlockParam['content'] {
198+
if (typeof result === 'object' && result && 'content' in result && Array.isArray(result.content)) {
199+
return result.content.map<TextBlockParam | ImageBlockParam>(content => {
200+
if (content.type === 'text') {
201+
return { type: 'text', text: content.text };
202+
} else if (content.type === 'image') {
203+
return { type: 'image', source: { type: 'base64', data: content.base64data, media_type: mimeTypeToMediaType(content.mimeType) } };
204+
} else {
205+
return { type: 'text', text: content.data };
206+
}
207+
});
200208
}
201209

202210
if (isArray(result)) {
@@ -321,10 +329,7 @@ export class AnthropicModel implements LanguageModel {
321329

322330
}));
323331

324-
const calls = toolResult.map(tr => {
325-
const resultAsString = typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result);
326-
return { finished: true, id: tr.id, result: resultAsString, function: { name: tr.name, arguments: tr.arguments } };
327-
});
332+
const calls = toolResult.map(tr => ({ finished: true, id: tr.id, result: tr.result, function: { name: tr.name, arguments: tr.arguments } }));
328333
yield { tool_calls: calls };
329334

330335
const toolResponseMessage: Anthropic.Messages.MessageParam = {

packages/ai-chat-ui/src/browser/chat-response-renderer/toolcall-part-renderer.tsx

Lines changed: 86 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,23 @@ import { inject, injectable } from '@theia/core/shared/inversify';
1919
import { ChatResponseContent, ToolCallChatResponseContent } from '@theia/ai-chat/lib/common';
2020
import { ReactNode } from '@theia/core/shared/react';
2121
import { nls } from '@theia/core/lib/common/nls';
22-
import { codicon } from '@theia/core/lib/browser';
22+
import { codicon, OpenerService } from '@theia/core/lib/browser';
2323
import * as React from '@theia/core/shared/react';
2424
import { ToolConfirmation, ToolConfirmationState } from './tool-confirmation';
2525
import { ToolConfirmationManager, ToolConfirmationMode } from '@theia/ai-chat/lib/browser/chat-tool-preferences';
2626
import { ResponseNode } from '../chat-tree-view';
27+
import { useMarkdownRendering } from './markdown-part-renderer';
28+
import { ToolCallResult } from '@theia/ai-core';
2729

2830
@injectable()
2931
export class ToolCallPartRenderer implements ChatResponsePartRenderer<ToolCallChatResponseContent> {
3032

3133
@inject(ToolConfirmationManager)
3234
protected toolConfirmationManager: ToolConfirmationManager;
3335

36+
@inject(OpenerService)
37+
protected openerService: OpenerService;
38+
3439
canHandle(response: ChatResponseContent): number {
3540
if (ToolCallChatResponseContent.is(response)) {
3641
return 10;
@@ -47,7 +52,52 @@ export class ToolCallPartRenderer implements ChatResponsePartRenderer<ToolCallCh
4752
toolConfirmationManager={this.toolConfirmationManager}
4853
chatId={chatId}
4954
renderCollapsibleArguments={this.renderCollapsibleArguments.bind(this)}
50-
tryPrettyPrintJson={this.tryPrettyPrintJson.bind(this)} />;
55+
responseRenderer={this.renderResult.bind(this)} />;
56+
}
57+
58+
protected renderResult(response: ToolCallChatResponseContent): ReactNode {
59+
const result = this.tryParse(response.result);
60+
if (!result) {
61+
return undefined;
62+
}
63+
if (typeof result === 'string') {
64+
return <pre>{JSON.stringify(result, undefined, 2)}</pre>;
65+
}
66+
if ('content' in result) {
67+
return <div className='theia-toolCall-response-content'>
68+
{result.content.map((content, idx) => {
69+
switch (content.type) {
70+
case 'image': {
71+
return <div key={`content-${idx}-${content.type}`} className='theia-toolCall-image-result'>
72+
<img src={`data:${content.mimeType};base64,${content.base64data}`} />
73+
</div>;
74+
}
75+
case 'text': {
76+
return <div key={`content-${idx}-${content.type}`} className='theia-toolCall-text-result'>
77+
<MarkdownRender text={content.text} openerService={this.openerService} />
78+
</div>;
79+
}
80+
case 'audio':
81+
case 'error':
82+
default: {
83+
return <div key={`content-${idx}-${content.type}`} className='theia-toolCall-default-result'><pre>{JSON.stringify(response, undefined, 2)}</pre></div>;
84+
}
85+
}
86+
})}
87+
</div>;
88+
}
89+
return <pre>{JSON.stringify(result, undefined, 2)}</pre>;
90+
}
91+
92+
private tryParse(result: ToolCallResult): ToolCallResult {
93+
if (!result) {
94+
return undefined;
95+
}
96+
try {
97+
return typeof result === 'string' ? JSON.parse(result) : result;
98+
} catch (error) {
99+
return result;
100+
}
51101
}
52102

53103
protected getToolConfirmationSettings(responseId: string, chatId: string): ToolConfirmationMode {
@@ -75,29 +125,6 @@ export class ToolCallPartRenderer implements ChatResponsePartRenderer<ToolCallCh
75125
return args;
76126
}
77127
}
78-
79-
private tryPrettyPrintJson(response: ToolCallChatResponseContent): string | undefined {
80-
let responseContent = response.result;
81-
try {
82-
if (responseContent) {
83-
if (typeof responseContent === 'string') {
84-
responseContent = JSON.parse(responseContent);
85-
}
86-
responseContent = JSON.stringify(responseContent, undefined, 2);
87-
}
88-
} catch (e) {
89-
if (typeof responseContent !== 'string') {
90-
responseContent = nls.localize(
91-
'theia/ai/chat-ui/toolcall-part-renderer/prettyPrintError',
92-
"The content could not be converted to string: '{0}'. This is the original content: '{1}'.",
93-
e.message,
94-
responseContent
95-
);
96-
}
97-
// fall through
98-
}
99-
return responseContent;
100-
}
101128
}
102129

103130
const Spinner = () => (
@@ -110,13 +137,13 @@ interface ToolCallContentProps {
110137
toolConfirmationManager: ToolConfirmationManager;
111138
chatId: string;
112139
renderCollapsibleArguments: (args: string | undefined) => ReactNode;
113-
tryPrettyPrintJson: (response: ToolCallChatResponseContent) => string | undefined;
140+
responseRenderer: (response: ToolCallChatResponseContent) => ReactNode | undefined;
114141
}
115142

116143
/**
117144
* A function component to handle tool call rendering and confirmation
118145
*/
119-
const ToolCallContent: React.FC<ToolCallContentProps> = ({ response, confirmationMode, toolConfirmationManager, chatId, tryPrettyPrintJson, renderCollapsibleArguments }) => {
146+
const ToolCallContent: React.FC<ToolCallContentProps> = ({ response, confirmationMode, toolConfirmationManager, chatId, responseRenderer, renderCollapsibleArguments }) => {
120147
const [confirmationState, setConfirmationState] = React.useState<ToolConfirmationState>('waiting');
121148

122149
React.useEffect(() => {
@@ -163,35 +190,43 @@ const ToolCallContent: React.FC<ToolCallContentProps> = ({ response, confirmatio
163190

164191
return (
165192
<div className='theia-toolCall'>
166-
<h4>
167-
{confirmationState === 'denied' ? (
168-
<span className="theia-tool-denied">
169-
<span className={codicon('error')}></span> {nls.localize('theia/ai/chat-ui/toolcall-part-renderer/denied', 'Execution denied')}: {response.name}
193+
{confirmationState === 'denied' ? (
194+
<span className='theia-toolCall-denied'>
195+
<span className={codicon('error')}></span> {nls.localize('theia/ai/chat-ui/toolcall-part-renderer/denied', 'Execution denied')}: {response.name}
196+
</span>
197+
) : response.finished ? (
198+
<details className='theia-toolCall-finished'>
199+
<summary>
200+
{nls.localize('theia/ai/chat-ui/toolcall-part-renderer/finished', 'Ran')} {response.name}
201+
({renderCollapsibleArguments(response.arguments)})
202+
</summary>
203+
<div className='theia-toolCall-response-result'>
204+
{responseRenderer(response)}
205+
</div>
206+
</details>
207+
) : (
208+
confirmationState === 'allowed' && (
209+
<span className='theia-toolCall-allowed'>
210+
<Spinner /> {nls.localizeByDefault('Running')} {response.name}
170211
</span>
171-
) : response.finished ? (
172-
<details>
173-
<summary>{nls.localize('theia/ai/chat-ui/toolcall-part-renderer/finished', 'Ran')} {response.name}
174-
({renderCollapsibleArguments(response.arguments)})
175-
</summary>
176-
<pre>{tryPrettyPrintJson(response)}</pre>
177-
</details>
178-
) : (
179-
confirmationState === 'allowed' && (
180-
<span>
181-
<Spinner /> {nls.localizeByDefault('Running')} {response.name}
182-
</span>
183-
)
184-
)}
185-
</h4>
212+
)
213+
)}
186214

187215
{/* Show confirmation UI when waiting for allow */}
188216
{confirmationState === 'waiting' && (
189-
<ToolConfirmation
190-
response={response}
191-
onAllow={handleAllow}
192-
onDeny={handleDeny}
193-
/>
217+
<span className='theia-toolCall-waiting'>
218+
<ToolConfirmation
219+
response={response}
220+
onAllow={handleAllow}
221+
onDeny={handleDeny}
222+
/>
223+
</span>
194224
)}
195225
</div>
196226
);
197227
};
228+
229+
const MarkdownRender = ({ text, openerService }: { text: string; openerService: OpenerService }) => {
230+
const ref = useMarkdownRendering(text, openerService);
231+
return <div ref={ref}></div>;
232+
};

packages/ai-chat-ui/src/browser/style/index.css

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
flex: 1;
88
}
99

10-
.chat-input-widget > .ps__rail-x,
11-
.chat-input-widget > .ps__rail-y {
10+
.chat-input-widget>.ps__rail-x,
11+
.chat-input-widget>.ps__rail-y {
1212
display: none !important;
1313
}
1414

@@ -23,7 +23,7 @@
2323
overflow-wrap: break-word;
2424
}
2525

26-
div:last-child > .theia-ChatNode {
26+
div:last-child>.theia-ChatNode {
2727
border: none;
2828
}
2929

@@ -59,6 +59,7 @@ div:last-child > .theia-ChatNode {
5959
}
6060

6161
@keyframes dots {
62+
6263
0%,
6364
20% {
6465
content: "";
@@ -121,7 +122,7 @@ div:last-child > .theia-ChatNode {
121122
padding-inline-start: 1rem;
122123
}
123124

124-
.theia-ChatNode li > p {
125+
.theia-ChatNode li>p {
125126
margin-top: 0;
126127
margin-bottom: 0;
127128
}
@@ -135,7 +136,7 @@ div:last-child > .theia-ChatNode {
135136
font-size: var(--theia-code-font-size);
136137
}
137138

138-
.theia-RequestNode > p div {
139+
.theia-RequestNode>p div {
139140
display: inline;
140141
}
141142

@@ -357,8 +358,7 @@ div:last-child > .theia-ChatNode {
357358
text-align: center;
358359
}
359360

360-
.theia-ChatInput-ChangeSet-List
361-
.theia-ChatInput-ChangeSet-Icon.codicon::before {
361+
.theia-ChatInput-ChangeSet-List .theia-ChatInput-ChangeSet-Icon.codicon::before {
362362
font-size: var(--theia-ui-font-size1);
363363
}
364364

@@ -375,8 +375,7 @@ div:last-child > .theia-ChatNode {
375375
color: var(--theia-disabledForeground);
376376
}
377377

378-
.theia-ChatInput-ChangeSet-List
379-
.theia-ChatInput-ChangeSet-AdditionalInfo-SuffixIcon {
378+
.theia-ChatInput-ChangeSet-List .theia-ChatInput-ChangeSet-AdditionalInfo-SuffixIcon {
380379
font-size: var(--theia-ui-font-size0) px;
381380
margin-left: 4px;
382381
}
@@ -619,8 +618,7 @@ div:last-child > .theia-ChatNode {
619618
display: flex;
620619
flex-direction: column;
621620
gap: 8px;
622-
border: var(--theia-border-width) solid
623-
var(--theia-sideBarSectionHeader-border);
621+
border: var(--theia-border-width) solid var(--theia-sideBarSectionHeader-border);
624622
padding: 8px 12px 12px;
625623
border-radius: 5px;
626624
margin: 0 0 8px 0;
@@ -821,14 +819,29 @@ div:last-child > .theia-ChatNode {
821819
color: var(--theia-errorForeground);
822820
}
823821

824-
.theia-tool-pending {
822+
.theia-toolCall-denied,
823+
.theia-toolCall-finished summary,
824+
.theia-toolCall-allowed,
825+
.theia-toolCall-waiting {
826+
font-weight: bold;
827+
}
828+
829+
.theia-toolCall-allowed .codicon-loading {
830+
font-size: 1em;
831+
}
832+
833+
.theia-toolCall-pending {
825834
color: var(--theia-descriptionForeground);
826835
}
827836

828-
.theia-tool-denied {
837+
.theia-toolCall-denied {
829838
color: var(--theia-errorForeground);
830839
}
831840

841+
.theia-toolCall-response-result {
842+
font-weight: normal;
843+
}
844+
832845
.theia-toolCall .fa,
833846
.theia-toolCall details summary::marker,
834847
.theia-thinking .fa,

packages/ai-chat/src/common/chat-model.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import {
2525
ResolvedAIContextVariable,
2626
TextMessage,
2727
ThinkingMessage,
28+
ToolCallResult,
2829
ToolResultMessage,
2930
ToolUseMessage
3031
} from '@theia/ai-core';
@@ -389,7 +390,7 @@ export interface ToolCallChatResponseContent extends Required<ChatResponseConten
389390
name?: string;
390391
arguments?: string;
391392
finished: boolean;
392-
result?: string;
393+
result?: ToolCallResult;
393394
confirmed: Promise<boolean>;
394395
confirm(): void;
395396
deny(): void;
@@ -1497,12 +1498,12 @@ export class ToolCallChatResponseContentImpl implements ToolCallChatResponseCont
14971498
protected _name?: string;
14981499
protected _arguments?: string;
14991500
protected _finished?: boolean;
1500-
protected _result?: string;
1501+
protected _result?: ToolCallResult;
15011502
protected _confirmed: Promise<boolean>;
15021503
protected _confirmationResolver?: (value: boolean) => void;
15031504
protected _confirmationRejecter?: (reason?: unknown) => void;
15041505

1505-
constructor(id?: string, name?: string, arg_string?: string, finished?: boolean, result?: string) {
1506+
constructor(id?: string, name?: string, arg_string?: string, finished?: boolean, result?: ToolCallResult) {
15061507
this._id = id;
15071508
this._name = name;
15081509
this._arguments = arg_string;
@@ -1527,7 +1528,7 @@ export class ToolCallChatResponseContentImpl implements ToolCallChatResponseCont
15271528
get finished(): boolean {
15281529
return this._finished === undefined ? false : this._finished;
15291530
}
1530-
get result(): string | undefined {
1531+
get result(): ToolCallResult | undefined {
15311532
return this._result;
15321533
}
15331534

0 commit comments

Comments
 (0)