Skip to content

Commit a73b4c0

Browse files
committed
Fix image response rendering in event logs
1 parent ae57c4d commit a73b4c0

1 file changed

Lines changed: 112 additions & 20 deletions

File tree

web/src/lib/protocol.ts

Lines changed: 112 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,26 @@ function dataToImageURL(mediaType: string, data: string): string | null {
326326
return isBase64Payload(payload) ? `data:${normalized};base64,${payload}` : null;
327327
}
328328

329+
function imageGenerationMarkdown(image: OpenAIImageGeneration): string | null {
330+
const data = image.result || image.partial;
331+
if (!data) return null;
332+
const mimeType = image.mimeType || imageMimeTypeFromOutputFormat(image.outputFormat) || "image/png";
333+
const url = dataToImageURL(mimeType, data);
334+
if (!url) return null;
335+
const alt = image.result ? "generated image" : "partial generated image";
336+
return `![${alt}](${url})`;
337+
}
338+
339+
function imageMimeTypeFromOutputFormat(raw?: string): string | null {
340+
const format = (raw || "").trim().toLowerCase().replace(/^\./, "");
341+
if (!format) return null;
342+
if (format === "jpg") return "image/jpeg";
343+
if (format === "png" || format === "jpeg" || format === "gif" || format === "webp") {
344+
return `image/${format}`;
345+
}
346+
return null;
347+
}
348+
329349
function isDisplayableImageURL(url: string): boolean {
330350
const trimmed = url.trim();
331351
return isSafeDataImageURL(trimmed) || /^https?:\/\//i.test(trimmed);
@@ -591,6 +611,14 @@ export interface StreamExtraction {
591611
raw?: unknown;
592612
}
593613

614+
interface OpenAIImageGeneration {
615+
order: number;
616+
result?: string;
617+
partial?: string;
618+
mimeType?: string;
619+
outputFormat?: string;
620+
}
621+
594622
interface AnthropicBlock {
595623
index: number;
596624
type: string;
@@ -616,11 +644,10 @@ export function extractResponseStream(text: string): StreamExtraction {
616644
const blocks = new Map<number, AnthropicBlock>();
617645
const blockOrder: number[] = [];
618646

619-
// OpenAI Responses function_call items are streamed as
620-
// response.output_item.added (with name/call_id) followed by
621-
// response.function_call_arguments.delta chunks. Track them by
622-
// output_index and flush at the end so the user sees the actual tool call
623-
// even when the response contains no output_text.
647+
// OpenAI Responses output items can arrive outside output_text deltas:
648+
// tool calls stream their arguments, while image generation streams base64
649+
// payloads on the item itself. Track them by output_index and flush at the
650+
// end so non-text responses don't render as empty assistant turns.
624651
interface OpenAICall {
625652
name: string;
626653
callID: string;
@@ -629,7 +656,8 @@ export function extractResponseStream(text: string): StreamExtraction {
629656
custom?: boolean;
630657
}
631658
const openAICalls = new Map<number, OpenAICall>();
632-
let openAICallOrder = 0;
659+
const openAIImages = new Map<number, OpenAIImageGeneration>();
660+
let openAIItemOrder = 0;
633661

634662
// CPA logs the SSE stream as-is plus a leading "Status: 200" / header
635663
// block. Walk line by line and only act on `data:` rows.
@@ -724,7 +752,7 @@ export function extractResponseStream(text: string): StreamExtraction {
724752
name: "tool",
725753
callID: "",
726754
args: o.delta,
727-
order: openAICallOrder++,
755+
order: openAIItemOrder++,
728756
});
729757
}
730758
}
@@ -739,12 +767,26 @@ export function extractResponseStream(text: string): StreamExtraction {
739767
name: "tool",
740768
callID: "",
741769
args: o.delta,
742-
order: openAICallOrder++,
770+
order: openAIItemOrder++,
743771
custom: true,
744772
});
745773
}
746774
}
747775
}
776+
if (
777+
o.type === "response.image_generation_call.partial_image" &&
778+
typeof o.partial_image_b64 === "string"
779+
) {
780+
const idx = typeof o.output_index === "number" ? (o.output_index as number) : -1;
781+
const existing = openAIImages.get(idx);
782+
openAIImages.set(idx, {
783+
order: existing?.order ?? openAIItemOrder++,
784+
result: existing?.result,
785+
partial: o.partial_image_b64,
786+
mimeType: existing?.mimeType,
787+
outputFormat: stringValue(o.output_format) || existing?.outputFormat,
788+
});
789+
}
748790
if (o.type === "response.output_item.added") {
749791
const item = (o.item as Record<string, unknown>) || {};
750792
if (item.type === "function_call") {
@@ -755,7 +797,7 @@ export function extractResponseStream(text: string): StreamExtraction {
755797
name: typeof item.name === "string" ? (item.name as string) : existing?.name || "tool",
756798
callID: typeof item.call_id === "string" ? (item.call_id as string) : existing?.callID || "",
757799
args: (existing?.args || "") + partial,
758-
order: existing?.order ?? openAICallOrder++,
800+
order: existing?.order ?? openAIItemOrder++,
759801
});
760802
} else if (item.type === "custom_tool_call") {
761803
const idx = typeof o.output_index === "number" ? (o.output_index as number) : -1;
@@ -765,9 +807,19 @@ export function extractResponseStream(text: string): StreamExtraction {
765807
name: typeof item.name === "string" ? (item.name as string) : existing?.name || "tool",
766808
callID: typeof item.call_id === "string" ? (item.call_id as string) : existing?.callID || "",
767809
args: (existing?.args || "") + partial,
768-
order: existing?.order ?? openAICallOrder++,
810+
order: existing?.order ?? openAIItemOrder++,
769811
custom: true,
770812
});
813+
} else if (item.type === "image_generation_call") {
814+
const idx = typeof o.output_index === "number" ? (o.output_index as number) : -1;
815+
const existing = openAIImages.get(idx);
816+
openAIImages.set(idx, {
817+
order: existing?.order ?? openAIItemOrder++,
818+
result: stringValue(item.result) || existing?.result,
819+
partial: existing?.partial,
820+
mimeType: stringValue(item.mime_type ?? item.media_type) || existing?.mimeType,
821+
outputFormat: stringValue(item.output_format) || existing?.outputFormat,
822+
});
771823
} else if (item.type === "reasoning" && hasEncryptedReasoning(item)) {
772824
markHidden(out, "reasoning");
773825
}
@@ -786,9 +838,18 @@ export function extractResponseStream(text: string): StreamExtraction {
786838
name: stringValue(item.name) || existing?.name || "tool",
787839
callID: stringValue(item.call_id) || existing?.callID || "",
788840
args: input || existing?.args || "",
789-
order: existing?.order ?? openAICallOrder++,
841+
order: existing?.order ?? openAIItemOrder++,
790842
custom: item.type === "custom_tool_call" || existing?.custom,
791843
});
844+
} else if (itemType === "image_generation_call") {
845+
const existingImage = openAIImages.get(idx);
846+
openAIImages.set(idx, {
847+
order: existingImage?.order ?? openAIItemOrder++,
848+
result: stringValue(item.result) || existingImage?.result,
849+
partial: existingImage?.partial,
850+
mimeType: stringValue(item.mime_type ?? item.media_type) || existingImage?.mimeType,
851+
outputFormat: stringValue(item.output_format) || existingImage?.outputFormat,
852+
});
792853
} else if (itemType === "web_search_call") {
793854
appendResponseMarkdown(out, webSearchCallMarkdown(item));
794855
} else if (itemType && !isKnownResponsesOutputType(itemType)) {
@@ -839,6 +900,23 @@ export function extractResponseStream(text: string): StreamExtraction {
839900
}
840901
}
841902

903+
const finalResponseOutput = (completedResponse ?? latestResponse)?.output;
904+
if (Array.isArray(finalResponseOutput)) {
905+
finalResponseOutput.forEach((item, index) => {
906+
if (!item || typeof item !== "object") return;
907+
const it = item as Record<string, unknown>;
908+
if (it.type !== "image_generation_call") return;
909+
const existing = openAIImages.get(index);
910+
openAIImages.set(index, {
911+
order: existing?.order ?? openAIItemOrder++,
912+
result: stringValue(it.result) || existing?.result,
913+
partial: existing?.partial,
914+
mimeType: stringValue(it.mime_type ?? it.media_type) || existing?.mimeType,
915+
outputFormat: stringValue(it.output_format) || existing?.outputFormat,
916+
});
917+
});
918+
}
919+
842920
// Flush Anthropic blocks in the order they were declared so tool_use calls
843921
// appear inline alongside the assistant's prose.
844922
for (const idx of blockOrder) {
@@ -868,13 +946,23 @@ export function extractResponseStream(text: string): StreamExtraction {
868946
}
869947
}
870948

871-
// Flush OpenAI Responses tool call items in arrival order.
872-
const calls = Array.from(openAICalls.values()).sort((a, b) => a.order - b.order);
873-
for (const c of calls) {
949+
// Flush OpenAI Responses output items in arrival order.
950+
const responseItems: Array<{ order: number; markdown: string }> = [];
951+
for (const image of openAIImages.values()) {
952+
const markdown = imageGenerationMarkdown(image);
953+
if (markdown) responseItems.push({ order: image.order, markdown });
954+
}
955+
for (const c of openAICalls.values()) {
874956
const input = c.custom ? c.args : formatPartialJSON(c.args);
875957
const language = c.custom ? customToolLanguage(c.name, input) : "json";
876-
const sep = out.content ? "\n\n" : "";
877-
out.content += sep + toolUseMarkdown(c.name, c.callID, input, language);
958+
responseItems.push({
959+
order: c.order,
960+
markdown: toolUseMarkdown(c.name, c.callID, input, language),
961+
});
962+
}
963+
responseItems.sort((a, b) => a.order - b.order);
964+
for (const item of responseItems) {
965+
appendResponseMarkdown(out, item.markdown);
878966
}
879967

880968
if (out.detected) {
@@ -1068,10 +1156,14 @@ export function extractResponseJSON(rawJson: string): StreamExtraction | null {
10681156
continue;
10691157
}
10701158
if (it.type === "image_generation_call" && typeof it.result === "string") {
1071-
const mimeType = stringValue(it.mime_type ?? it.media_type) || "image/png";
1072-
const url = dataToImageURL(mimeType, it.result);
1073-
if (url) {
1074-
appendResponseMarkdown(out, `![generated image](${url})`);
1159+
const markdown = imageGenerationMarkdown({
1160+
order: 0,
1161+
result: it.result,
1162+
mimeType: stringValue(it.mime_type ?? it.media_type),
1163+
outputFormat: stringValue(it.output_format),
1164+
});
1165+
if (markdown) {
1166+
appendResponseMarkdown(out, markdown);
10751167
out.detected = true;
10761168
}
10771169
continue;

0 commit comments

Comments
 (0)