Skip to content

Commit 6d8246a

Browse files
authored
[gpt-oss] Add ResponseReasoningPartAddedEvent, ResponseReasoningPartDoneEvent for streaming (#24938)
Signed-off-by: Andrew Xia <[email protected]>
1 parent 9d1c50a commit 6d8246a

File tree

3 files changed

+143
-33
lines changed

3 files changed

+143
-33
lines changed

tests/entrypoints/openai/test_response_api_with_harmony.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,57 @@ async def test_stateful_multi_turn(client: OpenAI, model_name: str):
287287
assert response3.status == "completed"
288288

289289

290+
@pytest.mark.asyncio
291+
@pytest.mark.parametrize("model_name", [MODEL_NAME])
292+
async def test_streaming_types(client: OpenAI, model_name: str):
293+
prompts = [
294+
"tell me a story about a cat in 20 words",
295+
]
296+
297+
# this links the "done" type with the "start" type
298+
# so every "done" type should have a corresponding "start" type
299+
# and every open block should be closed by the end of the stream
300+
pairs_of_event_types = {
301+
"response.completed": "response.created",
302+
"response.output_item.done": "response.output_item.added",
303+
"response.content_part.done": "response.content_part.added",
304+
"response.output_text.done": "response.output_text.delta",
305+
"response.web_search_call.done": "response.web_search_call.added",
306+
"response.reasoning_text.done": "response.reasoning_text.delta",
307+
"response.reasoning_part.done": "response.reasoning_part.added",
308+
}
309+
310+
for prompt in prompts:
311+
response = await client.responses.create(
312+
model=model_name,
313+
input=prompt,
314+
reasoning={"effort": "low"},
315+
tools=[],
316+
stream=True,
317+
background=False,
318+
)
319+
320+
stack_of_event_types = []
321+
async for event in response:
322+
if event.type == 'response.created':
323+
stack_of_event_types.append(event.type)
324+
elif event.type == 'response.completed':
325+
assert stack_of_event_types[-1] == pairs_of_event_types[
326+
event.type]
327+
stack_of_event_types.pop()
328+
if event.type.endswith("added"):
329+
stack_of_event_types.append(event.type)
330+
elif event.type.endswith("delta"):
331+
if stack_of_event_types[-1] == event.type:
332+
continue
333+
stack_of_event_types.append(event.type)
334+
elif event.type.endswith("done"):
335+
assert stack_of_event_types[-1] == pairs_of_event_types[
336+
event.type]
337+
stack_of_event_types.pop()
338+
assert len(stack_of_event_types) == 0
339+
340+
290341
@pytest.mark.asyncio
291342
@pytest.mark.parametrize("model_name", [MODEL_NAME])
292343
@pytest.mark.parametrize("background", [True, False])
@@ -343,7 +394,10 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool):
343394
assert event.item_id == current_item_id
344395

345396
# verify content_index_id is correct
346-
if event.type == "response.content_part.added":
397+
if event.type in [
398+
"response.content_part.added",
399+
"response.reasoning_part.added"
400+
]:
347401
assert event.content_index != current_content_index
348402
current_content_index = event.content_index
349403
elif event.type in [

vllm/entrypoints/openai/protocol.py

Lines changed: 68 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
ResponseReasoningTextDeltaEvent, ResponseReasoningTextDoneEvent,
3232
ResponseStatus, ResponseWebSearchCallCompletedEvent,
3333
ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent)
34+
from openai.types.responses.response_reasoning_item import (
35+
Content as ResponseReasoningTextContent)
3436

3537
# Backward compatibility for OpenAI client versions
3638
try: # For older openai versions (< 1.100.0)
@@ -260,26 +262,6 @@ def get_logits_processors(processors: Optional[LogitsProcessors],
260262
ResponseReasoningItem,
261263
ResponseFunctionToolCall]
262264

263-
StreamingResponsesResponse: TypeAlias = Union[
264-
ResponseCreatedEvent,
265-
ResponseInProgressEvent,
266-
ResponseCompletedEvent,
267-
ResponseOutputItemAddedEvent,
268-
ResponseOutputItemDoneEvent,
269-
ResponseContentPartAddedEvent,
270-
ResponseContentPartDoneEvent,
271-
ResponseReasoningTextDeltaEvent,
272-
ResponseReasoningTextDoneEvent,
273-
ResponseCodeInterpreterCallInProgressEvent,
274-
ResponseCodeInterpreterCallCodeDeltaEvent,
275-
ResponseWebSearchCallInProgressEvent,
276-
ResponseWebSearchCallSearchingEvent,
277-
ResponseWebSearchCallCompletedEvent,
278-
ResponseCodeInterpreterCallCodeDoneEvent,
279-
ResponseCodeInterpreterCallInterpretingEvent,
280-
ResponseCodeInterpreterCallCompletedEvent,
281-
]
282-
283265

284266
class ResponsesRequest(OpenAIBaseModel):
285267
# Ordered by official OpenAI API documentation
@@ -1916,6 +1898,72 @@ def from_request(
19161898
)
19171899

19181900

1901+
# TODO: this code can be removed once
1902+
# https://github.com/openai/openai-python/issues/2634 has been resolved
1903+
class ResponseReasoningPartDoneEvent(OpenAIBaseModel):
1904+
content_index: int
1905+
"""The index of the content part that is done."""
1906+
1907+
item_id: str
1908+
"""The ID of the output item that the content part was added to."""
1909+
1910+
output_index: int
1911+
"""The index of the output item that the content part was added to."""
1912+
1913+
part: ResponseReasoningTextContent
1914+
"""The content part that is done."""
1915+
1916+
sequence_number: int
1917+
"""The sequence number of this event."""
1918+
1919+
type: Literal["response.reasoning_part.done"]
1920+
"""The type of the event. Always `response.reasoning_part.done`."""
1921+
1922+
1923+
# TODO: this code can be removed once
1924+
# https://github.com/openai/openai-python/issues/2634 has been resolved
1925+
class ResponseReasoningPartAddedEvent(OpenAIBaseModel):
1926+
content_index: int
1927+
"""The index of the content part that is done."""
1928+
1929+
item_id: str
1930+
"""The ID of the output item that the content part was added to."""
1931+
1932+
output_index: int
1933+
"""The index of the output item that the content part was added to."""
1934+
1935+
part: ResponseReasoningTextContent
1936+
"""The content part that is done."""
1937+
1938+
sequence_number: int
1939+
"""The sequence number of this event."""
1940+
1941+
type: Literal["response.reasoning_part.added"]
1942+
"""The type of the event. Always `response.reasoning_part.added`."""
1943+
1944+
1945+
StreamingResponsesResponse: TypeAlias = Union[
1946+
ResponseCreatedEvent,
1947+
ResponseInProgressEvent,
1948+
ResponseCompletedEvent,
1949+
ResponseOutputItemAddedEvent,
1950+
ResponseOutputItemDoneEvent,
1951+
ResponseContentPartAddedEvent,
1952+
ResponseContentPartDoneEvent,
1953+
ResponseReasoningTextDeltaEvent,
1954+
ResponseReasoningTextDoneEvent,
1955+
ResponseReasoningPartAddedEvent,
1956+
ResponseReasoningPartDoneEvent,
1957+
ResponseCodeInterpreterCallInProgressEvent,
1958+
ResponseCodeInterpreterCallCodeDeltaEvent,
1959+
ResponseWebSearchCallInProgressEvent,
1960+
ResponseWebSearchCallSearchingEvent,
1961+
ResponseWebSearchCallCompletedEvent,
1962+
ResponseCodeInterpreterCallCodeDoneEvent,
1963+
ResponseCodeInterpreterCallInterpretingEvent,
1964+
ResponseCodeInterpreterCallCompletedEvent,
1965+
]
1966+
19191967
BatchRequestInputBody = Union[ChatCompletionRequest, EmbeddingRequest,
19201968
ScoreRequest, RerankRequest]
19211969

vllm/entrypoints/openai/serving_responses.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@
5858
InputTokensDetails,
5959
OutputTokensDetails,
6060
RequestResponseMetadata,
61+
ResponseReasoningPartAddedEvent,
62+
ResponseReasoningPartDoneEvent,
6163
ResponsesRequest,
6264
ResponsesResponse, ResponseUsage,
6365
StreamingResponsesResponse)
@@ -1280,14 +1282,13 @@ async def _process_harmony_streaming_events(
12801282
# Deal with tool call here
12811283
pass
12821284
elif previous_item.channel == "analysis":
1285+
content = ResponseReasoningTextContent(
1286+
text=previous_item.content[0].text,
1287+
type="reasoning_text",
1288+
)
12831289
reasoning_item = ResponseReasoningItem(
12841290
type="reasoning",
1285-
content=[
1286-
ResponseReasoningTextContent(
1287-
text=previous_item.content[0].text,
1288-
type="reasoning_text",
1289-
),
1290-
],
1291+
content=[content],
12911292
status="completed",
12921293
id=current_item_id,
12931294
summary=[],
@@ -1301,6 +1302,15 @@ async def _process_harmony_streaming_events(
13011302
content_index=current_content_index,
13021303
text=previous_item.content[0].text,
13031304
))
1305+
yield _increment_sequence_number_and_return(
1306+
ResponseReasoningPartDoneEvent(
1307+
type="response.reasoning_part.done",
1308+
sequence_number=-1,
1309+
item_id=current_item_id,
1310+
output_index=current_output_index,
1311+
content_index=current_content_index,
1312+
part=content,
1313+
))
13041314
yield _increment_sequence_number_and_return(
13051315
ResponseOutputItemDoneEvent(
13061316
type="response.output_item.done",
@@ -1412,17 +1422,15 @@ async def _process_harmony_streaming_events(
14121422
))
14131423
current_content_index += 1
14141424
yield _increment_sequence_number_and_return(
1415-
ResponseContentPartAddedEvent(
1416-
type="response.content_part.added",
1425+
ResponseReasoningPartAddedEvent(
1426+
type="response.reasoning_part.added",
14171427
sequence_number=-1,
14181428
output_index=current_output_index,
14191429
item_id=current_item_id,
14201430
content_index=current_content_index,
1421-
part=ResponseOutputText(
1422-
type="output_text",
1431+
part=ResponseReasoningTextContent(
14231432
text="",
1424-
annotations=[],
1425-
logprobs=[],
1433+
type="reasoning_text",
14261434
),
14271435
))
14281436
yield _increment_sequence_number_and_return(

0 commit comments

Comments
 (0)