Skip to content

Commit 04e7a55

Browse files
murugand23mdrxy
andauthored
fix(fireworks): translate canonical multimodal content blocks for chat completions (#37090)
## Summary `langchain_fireworks._convert_message_to_dict` ships LangChain canonical v0/v1 multimodal content blocks (e.g. `{"type": "image", "base64": ..., "mime_type": ...}`) on the wire unchanged. Fireworks' OpenAI-compatible chat completions API rejects the unknown `base64`/`mime_type` keys and the list shape on roles that expect a string, returning HTTP 422 — so any image upload, including via tools that return image content blocks, fails for Kimi K2.6 and other Fireworks vision models. This change mirrors `langchain_openai.chat_models.base._format_message_content`: - Walk `content` blocks. - Drop block types the chat-completions wire doesn't carry (`tool_use`, `thinking`, `reasoning_content`, `function_call`, `code_interpreter_call`). - Detect v0/v1 multimodal data blocks via `langchain_core.messages.is_data_content_block`, and translate them via `convert_to_openai_data_block(..., api="chat/completions")`. - Strings and non-list content pass through unchanged. Applied in the `ChatMessage`, `HumanMessage`, `SystemMessage`, and `ToolMessage` paths of `_convert_message_to_dict`. `AIMessage` already routes through `_convert_from_v1_to_chat_completions` for v1 output and assistant content is text-only on the way out, so it is left untouched. ## Why this approach Fireworks is OpenAI-compatible. The canonical → OpenAI translator already exists in `langchain_core.messages.block_translators.openai` and is the same one `langchain-openai` uses. Reusing it (rather than inventing a Fireworks-specific translator) gives: - v0 (`source_type`-based) and v1 (`base64`/`url`-based) data block coverage for free. - Consistent behavior with `langchain-openai` for image, file, and any future canonical data block. - A small, focused diff (≈30 lines of new code, plus tests). ## Test plan - [x] `make test` passes (64/64 unit tests, including 9 new ones for the new helper and translation paths). - [x] `make lint` passes (ruff check, ruff format, mypy, lint_imports). - [ ] End-to-end: image upload to a Kimi K2.6 (Fireworks) agent translates to `{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}` on the wire and the model returns a coherent description (validated locally against `langchain-fireworks==1.0.0` site-packages with the same patch). --------- Co-authored-by: murugand23 <murugand23@users.noreply.github.com> Co-authored-by: Mason Daugherty <github@mdrxy.com>
1 parent a1f336f commit 04e7a55

2 files changed

Lines changed: 407 additions & 6 deletions

File tree

libs/partners/fireworks/langchain_fireworks/chat_models.py

Lines changed: 85 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@
5757
ToolCall,
5858
ToolMessage,
5959
ToolMessageChunk,
60+
is_data_content_block,
61+
)
62+
from langchain_core.messages.block_translators.openai import (
63+
convert_to_openai_data_block,
6064
)
6165
from langchain_core.messages.tool import (
6266
ToolCallChunk,
@@ -166,6 +170,70 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
166170
return ChatMessage(content=_dict.get("content", ""), role=role or "")
167171

168172

173+
def _format_message_content(content: Any) -> Any:
174+
"""Format message content for the Fireworks chat completions wire format.
175+
176+
Adapted from `langchain_openai.chat_models.base._format_message_content`,
177+
scoped to the chat completions API: drops content block types the wire
178+
format does not carry, translates canonical v0/v1 multimodal data blocks
179+
via `convert_to_openai_data_block(block, api="chat/completions")`, and
180+
converts legacy Anthropic-shape image blocks (`{"type": "image",
181+
"source": {...}}`) to OpenAI `image_url` blocks. String and non-list
182+
content are returned unchanged.
183+
184+
Args:
185+
content: The message content. Strings and non-list values are
186+
returned as-is; lists are walked block by block.
187+
188+
Returns:
189+
The formatted content, ready to be placed on the chat completions
190+
wire. List inputs return a new list with translations applied; other
191+
inputs are returned unchanged.
192+
"""
193+
if not isinstance(content, list):
194+
return content
195+
formatted: list[Any] = []
196+
for block in content:
197+
if isinstance(block, dict) and "type" in block:
198+
btype = block["type"]
199+
if btype in (
200+
"tool_use",
201+
"thinking",
202+
"reasoning_content",
203+
"function_call",
204+
"code_interpreter_call",
205+
):
206+
continue
207+
if is_data_content_block(block):
208+
formatted.append(
209+
convert_to_openai_data_block(block, api="chat/completions")
210+
)
211+
continue
212+
if (
213+
btype == "image"
214+
and (source := block.get("source"))
215+
and isinstance(source, dict)
216+
):
217+
if (
218+
source.get("type") == "base64"
219+
and (media_type := source.get("media_type"))
220+
and (data := source.get("data"))
221+
):
222+
formatted.append(
223+
{
224+
"type": "image_url",
225+
"image_url": {"url": f"data:{media_type};base64,{data}"},
226+
}
227+
)
228+
continue
229+
if source.get("type") == "url" and (url := source.get("url")):
230+
formatted.append({"type": "image_url", "image_url": {"url": url}})
231+
continue
232+
continue
233+
formatted.append(block)
234+
return formatted
235+
236+
169237
def _convert_message_to_dict(message: BaseMessage) -> dict:
170238
"""Convert a LangChain message to a dictionary.
171239
@@ -178,14 +246,23 @@ def _convert_message_to_dict(message: BaseMessage) -> dict:
178246
"""
179247
message_dict: dict[str, Any]
180248
if isinstance(message, ChatMessage):
181-
message_dict = {"role": message.role, "content": message.content}
249+
message_dict = {
250+
"role": message.role,
251+
"content": _format_message_content(message.content),
252+
}
182253
elif isinstance(message, HumanMessage):
183-
message_dict = {"role": "user", "content": message.content}
254+
message_dict = {
255+
"role": "user",
256+
"content": _format_message_content(message.content),
257+
}
184258
elif isinstance(message, AIMessage):
185259
# Translate v1 content
186260
if message.response_metadata.get("output_version") == "v1":
187261
message = _convert_from_v1_to_chat_completions(message)
188-
message_dict = {"role": "assistant", "content": message.content}
262+
message_dict = {
263+
"role": "assistant",
264+
"content": _format_message_content(message.content),
265+
}
189266
if "function_call" in message.additional_kwargs:
190267
message_dict["function_call"] = message.additional_kwargs["function_call"]
191268
# If function call only, content is None not empty string
@@ -206,7 +283,10 @@ def _convert_message_to_dict(message: BaseMessage) -> dict:
206283
else:
207284
pass
208285
elif isinstance(message, SystemMessage):
209-
message_dict = {"role": "system", "content": message.content}
286+
message_dict = {
287+
"role": "system",
288+
"content": _format_message_content(message.content),
289+
}
210290
elif isinstance(message, FunctionMessage):
211291
message_dict = {
212292
"role": "function",
@@ -216,7 +296,7 @@ def _convert_message_to_dict(message: BaseMessage) -> dict:
216296
elif isinstance(message, ToolMessage):
217297
message_dict = {
218298
"role": "tool",
219-
"content": message.content,
299+
"content": _format_message_content(message.content),
220300
"tool_call_id": message.tool_call_id,
221301
}
222302
else:

0 commit comments

Comments
 (0)