@@ -326,6 +326,26 @@ function dataToImageURL(mediaType: string, data: string): string | null {
326326 return isBase64Payload ( payload ) ? `data:${ normalized } ;base64,${ payload } ` : null ;
327327}
328328
329+ function imageGenerationMarkdown ( image : OpenAIImageGeneration ) : string | null {
330+ const data = image . result || image . partial ;
331+ if ( ! data ) return null ;
332+ const mimeType = image . mimeType || imageMimeTypeFromOutputFormat ( image . outputFormat ) || "image/png" ;
333+ const url = dataToImageURL ( mimeType , data ) ;
334+ if ( ! url ) return null ;
335+ const alt = image . result ? "generated image" : "partial generated image" ;
336+ return `` ;
337+ }
338+
339+ function imageMimeTypeFromOutputFormat ( raw ?: string ) : string | null {
340+ const format = ( raw || "" ) . trim ( ) . toLowerCase ( ) . replace ( / ^ \. / , "" ) ;
341+ if ( ! format ) return null ;
342+ if ( format === "jpg" ) return "image/jpeg" ;
343+ if ( format === "png" || format === "jpeg" || format === "gif" || format === "webp" ) {
344+ return `image/${ format } ` ;
345+ }
346+ return null ;
347+ }
348+
329349function isDisplayableImageURL ( url : string ) : boolean {
330350 const trimmed = url . trim ( ) ;
331351 return isSafeDataImageURL ( trimmed ) || / ^ h t t p s ? : \/ \/ / i. test ( trimmed ) ;
@@ -591,6 +611,14 @@ export interface StreamExtraction {
591611 raw ?: unknown ;
592612}
593613
614+ interface OpenAIImageGeneration {
615+ order : number ;
616+ result ?: string ;
617+ partial ?: string ;
618+ mimeType ?: string ;
619+ outputFormat ?: string ;
620+ }
621+
594622interface AnthropicBlock {
595623 index : number ;
596624 type : string ;
@@ -616,11 +644,10 @@ export function extractResponseStream(text: string): StreamExtraction {
616644 const blocks = new Map < number , AnthropicBlock > ( ) ;
617645 const blockOrder : number [ ] = [ ] ;
618646
619- // OpenAI Responses function_call items are streamed as
620- // response.output_item.added (with name/call_id) followed by
621- // response.function_call_arguments.delta chunks. Track them by
622- // output_index and flush at the end so the user sees the actual tool call
623- // even when the response contains no output_text.
647+ // OpenAI Responses output items can arrive outside output_text deltas:
648+ // tool calls stream their arguments, while image generation streams base64
649+ // payloads on the item itself. Track them by output_index and flush at the
650+ // end so non-text responses don't render as empty assistant turns.
624651 interface OpenAICall {
625652 name : string ;
626653 callID : string ;
@@ -629,7 +656,8 @@ export function extractResponseStream(text: string): StreamExtraction {
629656 custom ?: boolean ;
630657 }
631658 const openAICalls = new Map < number , OpenAICall > ( ) ;
632- let openAICallOrder = 0 ;
659+ const openAIImages = new Map < number , OpenAIImageGeneration > ( ) ;
660+ let openAIItemOrder = 0 ;
633661
634662 // CPA logs the SSE stream as-is plus a leading "Status: 200" / header
635663 // block. Walk line by line and only act on `data:` rows.
@@ -724,7 +752,7 @@ export function extractResponseStream(text: string): StreamExtraction {
724752 name : "tool" ,
725753 callID : "" ,
726754 args : o . delta ,
727- order : openAICallOrder ++ ,
755+ order : openAIItemOrder ++ ,
728756 } ) ;
729757 }
730758 }
@@ -739,12 +767,26 @@ export function extractResponseStream(text: string): StreamExtraction {
739767 name : "tool" ,
740768 callID : "" ,
741769 args : o . delta ,
742- order : openAICallOrder ++ ,
770+ order : openAIItemOrder ++ ,
743771 custom : true ,
744772 } ) ;
745773 }
746774 }
747775 }
776+ if (
777+ o . type === "response.image_generation_call.partial_image" &&
778+ typeof o . partial_image_b64 === "string"
779+ ) {
780+ const idx = typeof o . output_index === "number" ? ( o . output_index as number ) : - 1 ;
781+ const existing = openAIImages . get ( idx ) ;
782+ openAIImages . set ( idx , {
783+ order : existing ?. order ?? openAIItemOrder ++ ,
784+ result : existing ?. result ,
785+ partial : o . partial_image_b64 ,
786+ mimeType : existing ?. mimeType ,
787+ outputFormat : stringValue ( o . output_format ) || existing ?. outputFormat ,
788+ } ) ;
789+ }
748790 if ( o . type === "response.output_item.added" ) {
749791 const item = ( o . item as Record < string , unknown > ) || { } ;
750792 if ( item . type === "function_call" ) {
@@ -755,7 +797,7 @@ export function extractResponseStream(text: string): StreamExtraction {
755797 name : typeof item . name === "string" ? ( item . name as string ) : existing ?. name || "tool" ,
756798 callID : typeof item . call_id === "string" ? ( item . call_id as string ) : existing ?. callID || "" ,
757799 args : ( existing ?. args || "" ) + partial ,
758- order : existing ?. order ?? openAICallOrder ++ ,
800+ order : existing ?. order ?? openAIItemOrder ++ ,
759801 } ) ;
760802 } else if ( item . type === "custom_tool_call" ) {
761803 const idx = typeof o . output_index === "number" ? ( o . output_index as number ) : - 1 ;
@@ -765,9 +807,19 @@ export function extractResponseStream(text: string): StreamExtraction {
765807 name : typeof item . name === "string" ? ( item . name as string ) : existing ?. name || "tool" ,
766808 callID : typeof item . call_id === "string" ? ( item . call_id as string ) : existing ?. callID || "" ,
767809 args : ( existing ?. args || "" ) + partial ,
768- order : existing ?. order ?? openAICallOrder ++ ,
810+ order : existing ?. order ?? openAIItemOrder ++ ,
769811 custom : true ,
770812 } ) ;
813+ } else if ( item . type === "image_generation_call" ) {
814+ const idx = typeof o . output_index === "number" ? ( o . output_index as number ) : - 1 ;
815+ const existing = openAIImages . get ( idx ) ;
816+ openAIImages . set ( idx , {
817+ order : existing ?. order ?? openAIItemOrder ++ ,
818+ result : stringValue ( item . result ) || existing ?. result ,
819+ partial : existing ?. partial ,
820+ mimeType : stringValue ( item . mime_type ?? item . media_type ) || existing ?. mimeType ,
821+ outputFormat : stringValue ( item . output_format ) || existing ?. outputFormat ,
822+ } ) ;
771823 } else if ( item . type === "reasoning" && hasEncryptedReasoning ( item ) ) {
772824 markHidden ( out , "reasoning" ) ;
773825 }
@@ -786,9 +838,18 @@ export function extractResponseStream(text: string): StreamExtraction {
786838 name : stringValue ( item . name ) || existing ?. name || "tool" ,
787839 callID : stringValue ( item . call_id ) || existing ?. callID || "" ,
788840 args : input || existing ?. args || "" ,
789- order : existing ?. order ?? openAICallOrder ++ ,
841+ order : existing ?. order ?? openAIItemOrder ++ ,
790842 custom : item . type === "custom_tool_call" || existing ?. custom ,
791843 } ) ;
844+ } else if ( itemType === "image_generation_call" ) {
845+ const existingImage = openAIImages . get ( idx ) ;
846+ openAIImages . set ( idx , {
847+ order : existingImage ?. order ?? openAIItemOrder ++ ,
848+ result : stringValue ( item . result ) || existingImage ?. result ,
849+ partial : existingImage ?. partial ,
850+ mimeType : stringValue ( item . mime_type ?? item . media_type ) || existingImage ?. mimeType ,
851+ outputFormat : stringValue ( item . output_format ) || existingImage ?. outputFormat ,
852+ } ) ;
792853 } else if ( itemType === "web_search_call" ) {
793854 appendResponseMarkdown ( out , webSearchCallMarkdown ( item ) ) ;
794855 } else if ( itemType && ! isKnownResponsesOutputType ( itemType ) ) {
@@ -839,6 +900,23 @@ export function extractResponseStream(text: string): StreamExtraction {
839900 }
840901 }
841902
903+ const finalResponseOutput = ( completedResponse ?? latestResponse ) ?. output ;
904+ if ( Array . isArray ( finalResponseOutput ) ) {
905+ finalResponseOutput . forEach ( ( item , index ) => {
906+ if ( ! item || typeof item !== "object" ) return ;
907+ const it = item as Record < string , unknown > ;
908+ if ( it . type !== "image_generation_call" ) return ;
909+ const existing = openAIImages . get ( index ) ;
910+ openAIImages . set ( index , {
911+ order : existing ?. order ?? openAIItemOrder ++ ,
912+ result : stringValue ( it . result ) || existing ?. result ,
913+ partial : existing ?. partial ,
914+ mimeType : stringValue ( it . mime_type ?? it . media_type ) || existing ?. mimeType ,
915+ outputFormat : stringValue ( it . output_format ) || existing ?. outputFormat ,
916+ } ) ;
917+ } ) ;
918+ }
919+
842920 // Flush Anthropic blocks in the order they were declared so tool_use calls
843921 // appear inline alongside the assistant's prose.
844922 for ( const idx of blockOrder ) {
@@ -868,13 +946,23 @@ export function extractResponseStream(text: string): StreamExtraction {
868946 }
869947 }
870948
871- // Flush OpenAI Responses tool call items in arrival order.
872- const calls = Array . from ( openAICalls . values ( ) ) . sort ( ( a , b ) => a . order - b . order ) ;
873- for ( const c of calls ) {
949+ // Flush OpenAI Responses output items in arrival order.
950+ const responseItems : Array < { order : number ; markdown : string } > = [ ] ;
951+ for ( const image of openAIImages . values ( ) ) {
952+ const markdown = imageGenerationMarkdown ( image ) ;
953+ if ( markdown ) responseItems . push ( { order : image . order , markdown } ) ;
954+ }
955+ for ( const c of openAICalls . values ( ) ) {
874956 const input = c . custom ? c . args : formatPartialJSON ( c . args ) ;
875957 const language = c . custom ? customToolLanguage ( c . name , input ) : "json" ;
876- const sep = out . content ? "\n\n" : "" ;
877- out . content += sep + toolUseMarkdown ( c . name , c . callID , input , language ) ;
958+ responseItems . push ( {
959+ order : c . order ,
960+ markdown : toolUseMarkdown ( c . name , c . callID , input , language ) ,
961+ } ) ;
962+ }
963+ responseItems . sort ( ( a , b ) => a . order - b . order ) ;
964+ for ( const item of responseItems ) {
965+ appendResponseMarkdown ( out , item . markdown ) ;
878966 }
879967
880968 if ( out . detected ) {
@@ -1068,10 +1156,14 @@ export function extractResponseJSON(rawJson: string): StreamExtraction | null {
10681156 continue ;
10691157 }
10701158 if ( it . type === "image_generation_call" && typeof it . result === "string" ) {
1071- const mimeType = stringValue ( it . mime_type ?? it . media_type ) || "image/png" ;
1072- const url = dataToImageURL ( mimeType , it . result ) ;
1073- if ( url ) {
1074- appendResponseMarkdown ( out , `` ) ;
1159+ const markdown = imageGenerationMarkdown ( {
1160+ order : 0 ,
1161+ result : it . result ,
1162+ mimeType : stringValue ( it . mime_type ?? it . media_type ) ,
1163+ outputFormat : stringValue ( it . output_format ) ,
1164+ } ) ;
1165+ if ( markdown ) {
1166+ appendResponseMarkdown ( out , markdown ) ;
10751167 out . detected = true ;
10761168 }
10771169 continue ;
0 commit comments