@@ -28,7 +28,7 @@ import {
2828} from '@theia/ai-core' ;
2929import { CancellationToken , isArray } from '@theia/core' ;
3030import { Anthropic } from '@anthropic-ai/sdk' ;
31- import { Message , MessageParam } from '@anthropic-ai/sdk/resources' ;
31+ import type { Message , MessageParam } from '@anthropic-ai/sdk/resources' ;
3232
3333export const DEFAULT_MAX_TOKENS = 4096 ;
3434
@@ -41,7 +41,7 @@ interface ToolCallback {
4141
4242const createMessageContent = ( message : LanguageModelMessage ) : MessageParam [ 'content' ] => {
4343 if ( LanguageModelMessage . isTextMessage ( message ) ) {
44- return message . text ;
44+ return [ { type : 'text' , text : message . text } ] ;
4545 } else if ( LanguageModelMessage . isThinkingMessage ( message ) ) {
4646 return [ { signature : message . signature , thinking : message . thinking , type : 'thinking' } ] ;
4747 } else if ( LanguageModelMessage . isToolUseMessage ( message ) ) {
@@ -52,17 +52,27 @@ const createMessageContent = (message: LanguageModelMessage): MessageParam['cont
5252 throw new Error ( `Unknown message type:'${ JSON . stringify ( message ) } '` ) ;
5353} ;
5454
55+ type NonThinkingParam = Exclude < Anthropic . Messages . ContentBlockParam , Anthropic . Messages . ThinkingBlockParam | Anthropic . Messages . RedactedThinkingBlockParam > ;
56+ function isNonThinkingParam (
57+ content : Anthropic . Messages . ContentBlockParam
58+ ) : content is NonThinkingParam {
59+ return content . type !== 'thinking' && content . type !== 'redacted_thinking' ;
60+ }
61+
5562/**
5663 * Transforms Theia language model messages to Anthropic API format
5764 * @param messages Array of LanguageModelRequestMessage to transform
5865 * @returns Object containing transformed messages and optional system message
5966 */
6067function transformToAnthropicParams (
61- messages : readonly LanguageModelMessage [ ]
62- ) : { messages : MessageParam [ ] ; systemMessage ?: string } {
68+ messages : readonly LanguageModelMessage [ ] ,
69+ addCacheControl : boolean = true
70+ ) : { messages : MessageParam [ ] ; systemMessage ?: Anthropic . Messages . TextBlockParam [ ] } {
6371 // Extract the system message (if any), as it is a separate parameter in the Anthropic API.
6472 const systemMessageObj = messages . find ( message => message . actor === 'system' ) ;
65- const systemMessage = systemMessageObj && LanguageModelMessage . isTextMessage ( systemMessageObj ) && systemMessageObj . text || undefined ;
73+ const systemMessageText = systemMessageObj && LanguageModelMessage . isTextMessage ( systemMessageObj ) && systemMessageObj . text || undefined ;
74+ const systemMessage : Anthropic . Messages . TextBlockParam [ ] | undefined =
75+ systemMessageText ? [ { type : 'text' , text : systemMessageText , cache_control : addCacheControl ? { type : 'ephemeral' } : undefined } ] : undefined ;
6676
6777 const convertedMessages = messages
6878 . filter ( message => message . actor !== 'system' )
@@ -77,6 +87,35 @@ function transformToAnthropicParams(
7787 } ;
7888}
7989
90+ /**
91+ * If possible adds a cache control to the last message in the conversation.
92+ * This is used to enable incremental caching of the conversation.
93+ * @param messages The messages to process
94+ * @returns A new messages array with the last message adapted to include cache control. If no cache control can be added, the original messages are returned.
95+ * In any case, the original messages are not modified
96+ */
97+ function addCacheControlToLastMessage ( messages : Anthropic . Messages . MessageParam [ ] ) : Anthropic . Messages . MessageParam [ ] {
98+ const clonedMessages = [ ...messages ] ;
99+ const latestMessage = clonedMessages . pop ( ) ;
100+ if ( latestMessage ) {
101+ let content : NonThinkingParam | undefined = undefined ;
102+ if ( typeof latestMessage . content === 'string' ) {
103+ content = { type : 'text' , text : latestMessage . content } ;
104+ } else if ( Array . isArray ( latestMessage . content ) ) {
105+ // we can't set cache control on thinking messages, so we only set it on the last non-thinking block
106+ const filteredContent = latestMessage . content . filter ( isNonThinkingParam ) ;
107+ if ( filteredContent . length ) {
108+ content = filteredContent [ filteredContent . length - 1 ] ;
109+ }
110+ }
111+ if ( content ) {
112+ const cachedContent : NonThinkingParam = { ...content , cache_control : { type : 'ephemeral' } } ;
113+ return [ ...clonedMessages , { ...latestMessage , content : [ cachedContent ] } ] ;
114+ }
115+ }
116+ return messages ;
117+ }
118+
80119export const AnthropicModelIdentifier = Symbol ( 'AnthropicModelIdentifier' ) ;
81120
82121/**
@@ -102,6 +141,7 @@ export class AnthropicModel implements LanguageModel {
102141 public readonly id : string ,
103142 public model : string ,
104143 public enableStreaming : boolean ,
144+ public useCaching : boolean ,
105145 public apiKey : ( ) => string | undefined ,
106146 public maxTokens : number = DEFAULT_MAX_TOKENS ,
107147 protected readonly tokenUsageService ?: TokenUsageService
@@ -153,11 +193,18 @@ export class AnthropicModel implements LanguageModel {
153193 toolMessages ?: readonly Anthropic . Messages . MessageParam [ ]
154194 ) : Promise < LanguageModelStreamResponse > {
155195 const settings = this . getSettings ( request ) ;
156- const { messages, systemMessage } = transformToAnthropicParams ( request . messages ) ;
196+ const { messages, systemMessage } = transformToAnthropicParams ( request . messages , this . useCaching ) ;
197+
198+ let anthropicMessages = [ ...messages , ...( toolMessages ?? [ ] ) ] ;
199+
200+ if ( this . useCaching && anthropicMessages . length ) {
201+ anthropicMessages = addCacheControlToLastMessage ( anthropicMessages ) ;
202+ }
203+
157204 const tools = this . createTools ( request ) ;
158205 const params : Anthropic . MessageCreateParams = {
159206 max_tokens : this . maxTokens ,
160- messages : [ ... messages , ... ( toolMessages ?? [ ] ) ] ,
207+ messages : anthropicMessages ,
161208 tools,
162209 tool_choice : tools ? { type : 'auto' } : undefined ,
163210 model : this . model ,
@@ -231,6 +278,8 @@ export class AnthropicModel implements LanguageModel {
231278 const tokenUsageParams : TokenUsageParams = {
232279 inputTokens : currentMessage . usage . input_tokens ,
233280 outputTokens : currentMessage . usage . output_tokens ,
281+ cachedInputTokens : currentMessage . usage . cache_creation_input_tokens || undefined ,
282+ readCachedInputTokens : currentMessage . usage . cache_read_input_tokens || undefined ,
234283 requestId : request . requestId
235284 } ;
236285 await that . tokenUsageService . recordTokenUsage ( that . id , tokenUsageParams ) ;
@@ -285,15 +334,21 @@ export class AnthropicModel implements LanguageModel {
285334 return { stream : asyncIterator } ;
286335 }
287336
288- private createTools ( request : LanguageModelRequest ) : Anthropic . Messages . Tool [ ] | undefined {
337+ protected createTools ( request : LanguageModelRequest ) : Anthropic . Messages . Tool [ ] | undefined {
289338 if ( request . tools ?. length === 0 ) {
290339 return undefined ;
291340 }
292- return request . tools ?. map ( tool => ( {
341+ const tools = request . tools ?. map ( tool => ( {
293342 name : tool . name ,
294343 description : tool . description ,
295344 input_schema : tool . parameters
296345 } as Anthropic . Messages . Tool ) ) ;
346+ if ( this . useCaching ) {
347+ if ( tools ?. length ) {
348+ tools [ tools . length - 1 ] . cache_control = { type : 'ephemeral' } ;
349+ }
350+ }
351+ return tools ;
297352 }
298353
299354 protected async handleNonStreamingRequest (
0 commit comments