Skip to content

Commit 33e90dd

Browse files
salmanapSalman Paracha
andauthored
fixed mixed inputs from openai v1/responses api (#632)
* fixed mixed inputs from openai v1/responses api * removing tracing from model-alias-rouing * handling additional input types from openairs --------- Co-authored-by: Salman Paracha <[email protected]>
1 parent a79f55f commit 33e90dd

File tree

4 files changed

+135
-51
lines changed

4 files changed

+135
-51
lines changed

crates/hermesllm/src/apis/openai_responses.rs

Lines changed: 55 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -113,25 +113,48 @@ pub struct ResponsesAPIRequest {
113113
pub enum InputParam {
114114
/// Simple text input
115115
Text(String),
116-
/// Array of input items
116+
/// Array of input items (messages, references, outputs, etc.)
117117
Items(Vec<InputItem>),
118118
}
119119

120-
/// Input item discriminated by type
120+
/// Input item - can be a message, item reference, function call output, etc.
121121
#[derive(Debug, Clone, Serialize, Deserialize)]
122-
#[serde(tag = "type", rename_all = "snake_case")]
122+
#[serde(untagged)]
123123
pub enum InputItem {
124-
/// Input message
124+
/// Input message (role + content)
125125
Message(InputMessage),
126+
/// Item reference
127+
ItemReference {
128+
#[serde(rename = "type")]
129+
item_type: String,
130+
id: String,
131+
},
132+
/// Function call output
133+
FunctionCallOutput {
134+
#[serde(rename = "type")]
135+
item_type: String,
136+
call_id: String,
137+
output: String,
138+
},
126139
}
127140

128141
/// Input message with role and content
129142
#[derive(Debug, Clone, Serialize, Deserialize)]
130143
pub struct InputMessage {
131144
/// Message role
132145
pub role: MessageRole,
133-
/// Message content
134-
pub content: Vec<InputContent>,
146+
/// Message content - can be a string or array of InputContent
147+
pub content: MessageContent,
148+
}
149+
150+
/// Message content - can be either a simple string or array of content items
151+
#[derive(Debug, Clone, Serialize, Deserialize)]
152+
#[serde(untagged)]
153+
pub enum MessageContent {
154+
/// Simple text content
155+
Text(String),
156+
/// Array of content items
157+
Items(Vec<InputContent>),
135158
}
136159

137160
/// Message roles
@@ -1025,16 +1048,23 @@ impl ProviderRequest for ResponsesAPIRequest {
10251048
items.iter().fold(String::new(), |acc, item| {
10261049
match item {
10271050
InputItem::Message(msg) => {
1028-
let content_text = msg.content.iter().fold(String::new(), |acc, content| {
1029-
acc + " " + &match content {
1030-
InputContent::InputText { text } => text.clone(),
1031-
InputContent::InputImage { .. } => "[Image]".to_string(),
1032-
InputContent::InputFile { .. } => "[File]".to_string(),
1033-
InputContent::InputAudio { .. } => "[Audio]".to_string(),
1051+
let content_text = match &msg.content {
1052+
MessageContent::Text(text) => text.clone(),
1053+
MessageContent::Items(content_items) => {
1054+
content_items.iter().fold(String::new(), |acc, content| {
1055+
acc + " " + &match content {
1056+
InputContent::InputText { text } => text.clone(),
1057+
InputContent::InputImage { .. } => "[Image]".to_string(),
1058+
InputContent::InputFile { .. } => "[File]".to_string(),
1059+
InputContent::InputAudio { .. } => "[Audio]".to_string(),
1060+
}
1061+
})
10341062
}
1035-
});
1063+
};
10361064
acc + " " + &content_text
10371065
}
1066+
// Skip non-message items (references, outputs, etc.)
1067+
_ => acc,
10381068
}
10391069
})
10401070
}
@@ -1048,14 +1078,20 @@ impl ProviderRequest for ResponsesAPIRequest {
10481078
items.iter().rev().find_map(|item| {
10491079
match item {
10501080
InputItem::Message(msg) if matches!(msg.role, MessageRole::User) => {
1051-
// Extract text from the first text content
1052-
msg.content.iter().find_map(|content| {
1053-
match content {
1054-
InputContent::InputText { text } => Some(text.clone()),
1055-
_ => None,
1081+
// Extract text from content
1082+
match &msg.content {
1083+
MessageContent::Text(text) => Some(text.clone()),
1084+
MessageContent::Items(content_items) => {
1085+
content_items.iter().find_map(|content| {
1086+
match content {
1087+
InputContent::InputText { text } => Some(text.clone()),
1088+
_ => None,
1089+
}
1090+
})
10561091
}
1057-
})
1092+
}
10581093
}
1094+
// Skip non-message items
10591095
_ => None,
10601096
}
10611097
})

crates/hermesllm/src/transforms/request/from_openai.rs

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -291,15 +291,43 @@ impl TryFrom<ResponsesAPIRequest> for ChatCompletionsRequest {
291291
MessageRole::Developer => Role::System, // Map developer to system
292292
};
293293

294-
// Convert content blocks
295-
let content = if input_msg.content.len() == 1 {
296-
// Single content item - check if it's simple text
297-
match &input_msg.content[0] {
298-
InputContent::InputText { text } => MessageContent::Text(text.clone()),
299-
_ => {
300-
// Convert to parts for non-text content
294+
// Convert content based on MessageContent type
295+
let content = match &input_msg.content {
296+
crate::apis::openai_responses::MessageContent::Text(text) => {
297+
// Simple text content
298+
MessageContent::Text(text.clone())
299+
}
300+
crate::apis::openai_responses::MessageContent::Items(content_items) => {
301+
// Check if it's a single text item (can use simple text format)
302+
if content_items.len() == 1 {
303+
if let InputContent::InputText { text } = &content_items[0] {
304+
MessageContent::Text(text.clone())
305+
} else {
306+
// Single non-text item - use parts format
307+
MessageContent::Parts(
308+
content_items.iter()
309+
.filter_map(|c| match c {
310+
InputContent::InputText { text } => {
311+
Some(crate::apis::openai::ContentPart::Text { text: text.clone() })
312+
}
313+
InputContent::InputImage { image_url, .. } => {
314+
Some(crate::apis::openai::ContentPart::ImageUrl {
315+
image_url: crate::apis::openai::ImageUrl {
316+
url: image_url.clone(),
317+
detail: None,
318+
}
319+
})
320+
}
321+
InputContent::InputFile { .. } => None, // Skip files for now
322+
InputContent::InputAudio { .. } => None, // Skip audio for now
323+
})
324+
.collect()
325+
)
326+
}
327+
} else {
328+
// Multiple content items - convert to parts
301329
MessageContent::Parts(
302-
input_msg.content.iter()
330+
content_items.iter()
303331
.filter_map(|c| match c {
304332
InputContent::InputText { text } => {
305333
Some(crate::apis::openai::ContentPart::Text { text: text.clone() })
@@ -319,27 +347,6 @@ impl TryFrom<ResponsesAPIRequest> for ChatCompletionsRequest {
319347
)
320348
}
321349
}
322-
} else {
323-
// Multiple content items - convert to parts
324-
MessageContent::Parts(
325-
input_msg.content.iter()
326-
.filter_map(|c| match c {
327-
InputContent::InputText { text } => {
328-
Some(crate::apis::openai::ContentPart::Text { text: text.clone() })
329-
}
330-
InputContent::InputImage { image_url, .. } => {
331-
Some(crate::apis::openai::ContentPart::ImageUrl {
332-
image_url: crate::apis::openai::ImageUrl {
333-
url: image_url.clone(),
334-
detail: None,
335-
}
336-
})
337-
}
338-
InputContent::InputFile { .. } => None, // Skip files for now
339-
InputContent::InputAudio { .. } => None, // Skip audio for now
340-
})
341-
.collect()
342-
)
343350
};
344351

345352
converted_messages.push(Message {
@@ -350,6 +357,9 @@ impl TryFrom<ResponsesAPIRequest> for ChatCompletionsRequest {
350357
tool_calls: None,
351358
});
352359
}
360+
// Skip non-message items (references, outputs) for now
361+
// These would need special handling in chat completions format
362+
_ => {}
353363
}
354364
}
355365

demos/use_cases/model_alias_routing/arch_config_with_aliases.yaml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,3 @@ model_aliases:
8989
# Alias for grok testing
9090
arch.grok.v1:
9191
target: grok-4-0709
92-
93-
tracing:
94-
random_sampling: 100

tests/e2e/test_openai_responses_api_client.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,3 +628,44 @@ def test_openai_responses_api_streaming_with_tools_upstream_anthropic():
628628
assert (
629629
full_text or tool_calls
630630
), "Expected streamed text or tool call argument deltas from Responses tools stream"
631+
632+
633+
def test_openai_responses_api_mixed_content_types():
634+
"""Test Responses API with mixed content types (string and array) in input messages"""
635+
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
636+
client = openai.OpenAI(api_key="test-key", base_url=f"{base_url}/v1")
637+
638+
# This test mimics the request that was failing:
639+
# One message with string content, another with array content
640+
resp = client.responses.create(
641+
model="arch.title.v1",
642+
input=[
643+
{
644+
"role": "developer",
645+
"content": "Generate a very short chat title (2-5 words max) based on the user's message.\n"
646+
"Rules:\n"
647+
"- Maximum 30 characters\n"
648+
"- No quotes, colons, hashtags, or markdown\n"
649+
"- Just the topic/intent, not a full sentence\n"
650+
'- If the message is a greeting like "hi" or "hello", respond with just "New conversation"\n'
651+
'- Be concise: "Weather in NYC" not "User asking about the weather in New York City"',
652+
},
653+
{
654+
"role": "user",
655+
"content": [
656+
{"type": "input_text", "text": "What is the weather in Seattle"}
657+
],
658+
},
659+
],
660+
)
661+
662+
# Print the response
663+
print(f"\n{'='*80}")
664+
print(f"Model: {resp.model}")
665+
print(f"Output: {resp.output_text}")
666+
print(f"{'='*80}\n")
667+
668+
assert resp is not None
669+
assert resp.id is not None
670+
# Verify we got a reasonable title
671+
assert len(resp.output_text) > 0

0 commit comments

Comments
 (0)