[gpt-oss] Add ResponseReasoningPartAddedEvent, ResponseReasoningPartDoneEvent for streaming (#24938)

Signed-off-by: Andrew Xia <axia@meta.com>
This commit is contained in:
Andrew Xia 2025-09-18 19:11:59 -07:00 committed by GitHub
parent 9d1c50a5ac
commit 6d8246aaff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 143 additions and 33 deletions

View File

@ -287,6 +287,57 @@ async def test_stateful_multi_turn(client: OpenAI, model_name: str):
assert response3.status == "completed" assert response3.status == "completed"
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_streaming_types(client: OpenAI, model_name: str):
prompts = [
"tell me a story about a cat in 20 words",
]
# this links the "done" type with the "start" type
# so every "done" type should have a corresponding "start" type
# and every open block should be closed by the end of the stream
pairs_of_event_types = {
"response.completed": "response.created",
"response.output_item.done": "response.output_item.added",
"response.content_part.done": "response.content_part.added",
"response.output_text.done": "response.output_text.delta",
"response.web_search_call.done": "response.web_search_call.added",
"response.reasoning_text.done": "response.reasoning_text.delta",
"response.reasoning_part.done": "response.reasoning_part.added",
}
for prompt in prompts:
response = await client.responses.create(
model=model_name,
input=prompt,
reasoning={"effort": "low"},
tools=[],
stream=True,
background=False,
)
stack_of_event_types = []
async for event in response:
if event.type == 'response.created':
stack_of_event_types.append(event.type)
elif event.type == 'response.completed':
assert stack_of_event_types[-1] == pairs_of_event_types[
event.type]
stack_of_event_types.pop()
if event.type.endswith("added"):
stack_of_event_types.append(event.type)
elif event.type.endswith("delta"):
if stack_of_event_types[-1] == event.type:
continue
stack_of_event_types.append(event.type)
elif event.type.endswith("done"):
assert stack_of_event_types[-1] == pairs_of_event_types[
event.type]
stack_of_event_types.pop()
assert len(stack_of_event_types) == 0
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("background", [True, False]) @pytest.mark.parametrize("background", [True, False])
@ -343,7 +394,10 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool):
assert event.item_id == current_item_id assert event.item_id == current_item_id
# verify content_index_id is correct # verify content_index_id is correct
if event.type == "response.content_part.added": if event.type in [
"response.content_part.added",
"response.reasoning_part.added"
]:
assert event.content_index != current_content_index assert event.content_index != current_content_index
current_content_index = event.content_index current_content_index = event.content_index
elif event.type in [ elif event.type in [

View File

@ -31,6 +31,8 @@ from openai.types.responses import (
ResponseReasoningTextDeltaEvent, ResponseReasoningTextDoneEvent, ResponseReasoningTextDeltaEvent, ResponseReasoningTextDoneEvent,
ResponseStatus, ResponseWebSearchCallCompletedEvent, ResponseStatus, ResponseWebSearchCallCompletedEvent,
ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent) ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent)
from openai.types.responses.response_reasoning_item import (
Content as ResponseReasoningTextContent)
# Backward compatibility for OpenAI client versions # Backward compatibility for OpenAI client versions
try: # For older openai versions (< 1.100.0) try: # For older openai versions (< 1.100.0)
@ -260,26 +262,6 @@ ResponseInputOutputItem: TypeAlias = Union[ResponseInputItemParam,
ResponseReasoningItem, ResponseReasoningItem,
ResponseFunctionToolCall] ResponseFunctionToolCall]
StreamingResponsesResponse: TypeAlias = Union[
ResponseCreatedEvent,
ResponseInProgressEvent,
ResponseCompletedEvent,
ResponseOutputItemAddedEvent,
ResponseOutputItemDoneEvent,
ResponseContentPartAddedEvent,
ResponseContentPartDoneEvent,
ResponseReasoningTextDeltaEvent,
ResponseReasoningTextDoneEvent,
ResponseCodeInterpreterCallInProgressEvent,
ResponseCodeInterpreterCallCodeDeltaEvent,
ResponseWebSearchCallInProgressEvent,
ResponseWebSearchCallSearchingEvent,
ResponseWebSearchCallCompletedEvent,
ResponseCodeInterpreterCallCodeDoneEvent,
ResponseCodeInterpreterCallInterpretingEvent,
ResponseCodeInterpreterCallCompletedEvent,
]
class ResponsesRequest(OpenAIBaseModel): class ResponsesRequest(OpenAIBaseModel):
# Ordered by official OpenAI API documentation # Ordered by official OpenAI API documentation
@ -1916,6 +1898,72 @@ class ResponsesResponse(OpenAIBaseModel):
) )
# TODO: this code can be removed once
# https://github.com/openai/openai-python/issues/2634 has been resolved
class ResponseReasoningPartDoneEvent(OpenAIBaseModel):
content_index: int
"""The index of the content part that is done."""
item_id: str
"""The ID of the output item that the content part was added to."""
output_index: int
"""The index of the output item that the content part was added to."""
part: ResponseReasoningTextContent
"""The content part that is done."""
sequence_number: int
"""The sequence number of this event."""
type: Literal["response.reasoning_part.done"]
"""The type of the event. Always `response.reasoning_part.done`."""
# TODO: this code can be removed once
# https://github.com/openai/openai-python/issues/2634 has been resolved
class ResponseReasoningPartAddedEvent(OpenAIBaseModel):
content_index: int
"""The index of the content part that is done."""
item_id: str
"""The ID of the output item that the content part was added to."""
output_index: int
"""The index of the output item that the content part was added to."""
part: ResponseReasoningTextContent
"""The content part that is done."""
sequence_number: int
"""The sequence number of this event."""
type: Literal["response.reasoning_part.added"]
"""The type of the event. Always `response.reasoning_part.added`."""
StreamingResponsesResponse: TypeAlias = Union[
ResponseCreatedEvent,
ResponseInProgressEvent,
ResponseCompletedEvent,
ResponseOutputItemAddedEvent,
ResponseOutputItemDoneEvent,
ResponseContentPartAddedEvent,
ResponseContentPartDoneEvent,
ResponseReasoningTextDeltaEvent,
ResponseReasoningTextDoneEvent,
ResponseReasoningPartAddedEvent,
ResponseReasoningPartDoneEvent,
ResponseCodeInterpreterCallInProgressEvent,
ResponseCodeInterpreterCallCodeDeltaEvent,
ResponseWebSearchCallInProgressEvent,
ResponseWebSearchCallSearchingEvent,
ResponseWebSearchCallCompletedEvent,
ResponseCodeInterpreterCallCodeDoneEvent,
ResponseCodeInterpreterCallInterpretingEvent,
ResponseCodeInterpreterCallCompletedEvent,
]
BatchRequestInputBody = Union[ChatCompletionRequest, EmbeddingRequest, BatchRequestInputBody = Union[ChatCompletionRequest, EmbeddingRequest,
ScoreRequest, RerankRequest] ScoreRequest, RerankRequest]

View File

@ -58,6 +58,8 @@ from vllm.entrypoints.openai.protocol import (DeltaMessage, ErrorResponse,
InputTokensDetails, InputTokensDetails,
OutputTokensDetails, OutputTokensDetails,
RequestResponseMetadata, RequestResponseMetadata,
ResponseReasoningPartAddedEvent,
ResponseReasoningPartDoneEvent,
ResponsesRequest, ResponsesRequest,
ResponsesResponse, ResponseUsage, ResponsesResponse, ResponseUsage,
StreamingResponsesResponse) StreamingResponsesResponse)
@ -1280,14 +1282,13 @@ class OpenAIServingResponses(OpenAIServing):
# Deal with tool call here # Deal with tool call here
pass pass
elif previous_item.channel == "analysis": elif previous_item.channel == "analysis":
content = ResponseReasoningTextContent(
text=previous_item.content[0].text,
type="reasoning_text",
)
reasoning_item = ResponseReasoningItem( reasoning_item = ResponseReasoningItem(
type="reasoning", type="reasoning",
content=[ content=[content],
ResponseReasoningTextContent(
text=previous_item.content[0].text,
type="reasoning_text",
),
],
status="completed", status="completed",
id=current_item_id, id=current_item_id,
summary=[], summary=[],
@ -1301,6 +1302,15 @@ class OpenAIServingResponses(OpenAIServing):
content_index=current_content_index, content_index=current_content_index,
text=previous_item.content[0].text, text=previous_item.content[0].text,
)) ))
yield _increment_sequence_number_and_return(
ResponseReasoningPartDoneEvent(
type="response.reasoning_part.done",
sequence_number=-1,
item_id=current_item_id,
output_index=current_output_index,
content_index=current_content_index,
part=content,
))
yield _increment_sequence_number_and_return( yield _increment_sequence_number_and_return(
ResponseOutputItemDoneEvent( ResponseOutputItemDoneEvent(
type="response.output_item.done", type="response.output_item.done",
@ -1412,17 +1422,15 @@ class OpenAIServingResponses(OpenAIServing):
)) ))
current_content_index += 1 current_content_index += 1
yield _increment_sequence_number_and_return( yield _increment_sequence_number_and_return(
ResponseContentPartAddedEvent( ResponseReasoningPartAddedEvent(
type="response.content_part.added", type="response.reasoning_part.added",
sequence_number=-1, sequence_number=-1,
output_index=current_output_index, output_index=current_output_index,
item_id=current_item_id, item_id=current_item_id,
content_index=current_content_index, content_index=current_content_index,
part=ResponseOutputText( part=ResponseReasoningTextContent(
type="output_text",
text="", text="",
annotations=[], type="reasoning_text",
logprobs=[],
), ),
)) ))
yield _increment_sequence_number_and_return( yield _increment_sequence_number_and_return(