mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 02:05:01 +08:00
[gpt-oss] Add ResponseReasoningPartAddedEvent, ResponseReasoningPartDoneEvent for streaming (#24938)
Signed-off-by: Andrew Xia <axia@meta.com>
This commit is contained in:
parent
9d1c50a5ac
commit
6d8246aaff
@ -287,6 +287,57 @@ async def test_stateful_multi_turn(client: OpenAI, model_name: str):
|
|||||||
assert response3.status == "completed"
|
assert response3.status == "completed"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||||
|
async def test_streaming_types(client: OpenAI, model_name: str):
|
||||||
|
prompts = [
|
||||||
|
"tell me a story about a cat in 20 words",
|
||||||
|
]
|
||||||
|
|
||||||
|
# this links the "done" type with the "start" type
|
||||||
|
# so every "done" type should have a corresponding "start" type
|
||||||
|
# and every open block should be closed by the end of the stream
|
||||||
|
pairs_of_event_types = {
|
||||||
|
"response.completed": "response.created",
|
||||||
|
"response.output_item.done": "response.output_item.added",
|
||||||
|
"response.content_part.done": "response.content_part.added",
|
||||||
|
"response.output_text.done": "response.output_text.delta",
|
||||||
|
"response.web_search_call.done": "response.web_search_call.added",
|
||||||
|
"response.reasoning_text.done": "response.reasoning_text.delta",
|
||||||
|
"response.reasoning_part.done": "response.reasoning_part.added",
|
||||||
|
}
|
||||||
|
|
||||||
|
for prompt in prompts:
|
||||||
|
response = await client.responses.create(
|
||||||
|
model=model_name,
|
||||||
|
input=prompt,
|
||||||
|
reasoning={"effort": "low"},
|
||||||
|
tools=[],
|
||||||
|
stream=True,
|
||||||
|
background=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
stack_of_event_types = []
|
||||||
|
async for event in response:
|
||||||
|
if event.type == 'response.created':
|
||||||
|
stack_of_event_types.append(event.type)
|
||||||
|
elif event.type == 'response.completed':
|
||||||
|
assert stack_of_event_types[-1] == pairs_of_event_types[
|
||||||
|
event.type]
|
||||||
|
stack_of_event_types.pop()
|
||||||
|
if event.type.endswith("added"):
|
||||||
|
stack_of_event_types.append(event.type)
|
||||||
|
elif event.type.endswith("delta"):
|
||||||
|
if stack_of_event_types[-1] == event.type:
|
||||||
|
continue
|
||||||
|
stack_of_event_types.append(event.type)
|
||||||
|
elif event.type.endswith("done"):
|
||||||
|
assert stack_of_event_types[-1] == pairs_of_event_types[
|
||||||
|
event.type]
|
||||||
|
stack_of_event_types.pop()
|
||||||
|
assert len(stack_of_event_types) == 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||||
@pytest.mark.parametrize("background", [True, False])
|
@pytest.mark.parametrize("background", [True, False])
|
||||||
@ -343,7 +394,10 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool):
|
|||||||
assert event.item_id == current_item_id
|
assert event.item_id == current_item_id
|
||||||
|
|
||||||
# verify content_index_id is correct
|
# verify content_index_id is correct
|
||||||
if event.type == "response.content_part.added":
|
if event.type in [
|
||||||
|
"response.content_part.added",
|
||||||
|
"response.reasoning_part.added"
|
||||||
|
]:
|
||||||
assert event.content_index != current_content_index
|
assert event.content_index != current_content_index
|
||||||
current_content_index = event.content_index
|
current_content_index = event.content_index
|
||||||
elif event.type in [
|
elif event.type in [
|
||||||
|
|||||||
@ -31,6 +31,8 @@ from openai.types.responses import (
|
|||||||
ResponseReasoningTextDeltaEvent, ResponseReasoningTextDoneEvent,
|
ResponseReasoningTextDeltaEvent, ResponseReasoningTextDoneEvent,
|
||||||
ResponseStatus, ResponseWebSearchCallCompletedEvent,
|
ResponseStatus, ResponseWebSearchCallCompletedEvent,
|
||||||
ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent)
|
ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent)
|
||||||
|
from openai.types.responses.response_reasoning_item import (
|
||||||
|
Content as ResponseReasoningTextContent)
|
||||||
|
|
||||||
# Backward compatibility for OpenAI client versions
|
# Backward compatibility for OpenAI client versions
|
||||||
try: # For older openai versions (< 1.100.0)
|
try: # For older openai versions (< 1.100.0)
|
||||||
@ -260,26 +262,6 @@ ResponseInputOutputItem: TypeAlias = Union[ResponseInputItemParam,
|
|||||||
ResponseReasoningItem,
|
ResponseReasoningItem,
|
||||||
ResponseFunctionToolCall]
|
ResponseFunctionToolCall]
|
||||||
|
|
||||||
StreamingResponsesResponse: TypeAlias = Union[
|
|
||||||
ResponseCreatedEvent,
|
|
||||||
ResponseInProgressEvent,
|
|
||||||
ResponseCompletedEvent,
|
|
||||||
ResponseOutputItemAddedEvent,
|
|
||||||
ResponseOutputItemDoneEvent,
|
|
||||||
ResponseContentPartAddedEvent,
|
|
||||||
ResponseContentPartDoneEvent,
|
|
||||||
ResponseReasoningTextDeltaEvent,
|
|
||||||
ResponseReasoningTextDoneEvent,
|
|
||||||
ResponseCodeInterpreterCallInProgressEvent,
|
|
||||||
ResponseCodeInterpreterCallCodeDeltaEvent,
|
|
||||||
ResponseWebSearchCallInProgressEvent,
|
|
||||||
ResponseWebSearchCallSearchingEvent,
|
|
||||||
ResponseWebSearchCallCompletedEvent,
|
|
||||||
ResponseCodeInterpreterCallCodeDoneEvent,
|
|
||||||
ResponseCodeInterpreterCallInterpretingEvent,
|
|
||||||
ResponseCodeInterpreterCallCompletedEvent,
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class ResponsesRequest(OpenAIBaseModel):
|
class ResponsesRequest(OpenAIBaseModel):
|
||||||
# Ordered by official OpenAI API documentation
|
# Ordered by official OpenAI API documentation
|
||||||
@ -1916,6 +1898,72 @@ class ResponsesResponse(OpenAIBaseModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: this code can be removed once
|
||||||
|
# https://github.com/openai/openai-python/issues/2634 has been resolved
|
||||||
|
class ResponseReasoningPartDoneEvent(OpenAIBaseModel):
|
||||||
|
content_index: int
|
||||||
|
"""The index of the content part that is done."""
|
||||||
|
|
||||||
|
item_id: str
|
||||||
|
"""The ID of the output item that the content part was added to."""
|
||||||
|
|
||||||
|
output_index: int
|
||||||
|
"""The index of the output item that the content part was added to."""
|
||||||
|
|
||||||
|
part: ResponseReasoningTextContent
|
||||||
|
"""The content part that is done."""
|
||||||
|
|
||||||
|
sequence_number: int
|
||||||
|
"""The sequence number of this event."""
|
||||||
|
|
||||||
|
type: Literal["response.reasoning_part.done"]
|
||||||
|
"""The type of the event. Always `response.reasoning_part.done`."""
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: this code can be removed once
|
||||||
|
# https://github.com/openai/openai-python/issues/2634 has been resolved
|
||||||
|
class ResponseReasoningPartAddedEvent(OpenAIBaseModel):
|
||||||
|
content_index: int
|
||||||
|
"""The index of the content part that is done."""
|
||||||
|
|
||||||
|
item_id: str
|
||||||
|
"""The ID of the output item that the content part was added to."""
|
||||||
|
|
||||||
|
output_index: int
|
||||||
|
"""The index of the output item that the content part was added to."""
|
||||||
|
|
||||||
|
part: ResponseReasoningTextContent
|
||||||
|
"""The content part that is done."""
|
||||||
|
|
||||||
|
sequence_number: int
|
||||||
|
"""The sequence number of this event."""
|
||||||
|
|
||||||
|
type: Literal["response.reasoning_part.added"]
|
||||||
|
"""The type of the event. Always `response.reasoning_part.added`."""
|
||||||
|
|
||||||
|
|
||||||
|
StreamingResponsesResponse: TypeAlias = Union[
|
||||||
|
ResponseCreatedEvent,
|
||||||
|
ResponseInProgressEvent,
|
||||||
|
ResponseCompletedEvent,
|
||||||
|
ResponseOutputItemAddedEvent,
|
||||||
|
ResponseOutputItemDoneEvent,
|
||||||
|
ResponseContentPartAddedEvent,
|
||||||
|
ResponseContentPartDoneEvent,
|
||||||
|
ResponseReasoningTextDeltaEvent,
|
||||||
|
ResponseReasoningTextDoneEvent,
|
||||||
|
ResponseReasoningPartAddedEvent,
|
||||||
|
ResponseReasoningPartDoneEvent,
|
||||||
|
ResponseCodeInterpreterCallInProgressEvent,
|
||||||
|
ResponseCodeInterpreterCallCodeDeltaEvent,
|
||||||
|
ResponseWebSearchCallInProgressEvent,
|
||||||
|
ResponseWebSearchCallSearchingEvent,
|
||||||
|
ResponseWebSearchCallCompletedEvent,
|
||||||
|
ResponseCodeInterpreterCallCodeDoneEvent,
|
||||||
|
ResponseCodeInterpreterCallInterpretingEvent,
|
||||||
|
ResponseCodeInterpreterCallCompletedEvent,
|
||||||
|
]
|
||||||
|
|
||||||
BatchRequestInputBody = Union[ChatCompletionRequest, EmbeddingRequest,
|
BatchRequestInputBody = Union[ChatCompletionRequest, EmbeddingRequest,
|
||||||
ScoreRequest, RerankRequest]
|
ScoreRequest, RerankRequest]
|
||||||
|
|
||||||
|
|||||||
@ -58,6 +58,8 @@ from vllm.entrypoints.openai.protocol import (DeltaMessage, ErrorResponse,
|
|||||||
InputTokensDetails,
|
InputTokensDetails,
|
||||||
OutputTokensDetails,
|
OutputTokensDetails,
|
||||||
RequestResponseMetadata,
|
RequestResponseMetadata,
|
||||||
|
ResponseReasoningPartAddedEvent,
|
||||||
|
ResponseReasoningPartDoneEvent,
|
||||||
ResponsesRequest,
|
ResponsesRequest,
|
||||||
ResponsesResponse, ResponseUsage,
|
ResponsesResponse, ResponseUsage,
|
||||||
StreamingResponsesResponse)
|
StreamingResponsesResponse)
|
||||||
@ -1280,14 +1282,13 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
# Deal with tool call here
|
# Deal with tool call here
|
||||||
pass
|
pass
|
||||||
elif previous_item.channel == "analysis":
|
elif previous_item.channel == "analysis":
|
||||||
|
content = ResponseReasoningTextContent(
|
||||||
|
text=previous_item.content[0].text,
|
||||||
|
type="reasoning_text",
|
||||||
|
)
|
||||||
reasoning_item = ResponseReasoningItem(
|
reasoning_item = ResponseReasoningItem(
|
||||||
type="reasoning",
|
type="reasoning",
|
||||||
content=[
|
content=[content],
|
||||||
ResponseReasoningTextContent(
|
|
||||||
text=previous_item.content[0].text,
|
|
||||||
type="reasoning_text",
|
|
||||||
),
|
|
||||||
],
|
|
||||||
status="completed",
|
status="completed",
|
||||||
id=current_item_id,
|
id=current_item_id,
|
||||||
summary=[],
|
summary=[],
|
||||||
@ -1301,6 +1302,15 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
content_index=current_content_index,
|
content_index=current_content_index,
|
||||||
text=previous_item.content[0].text,
|
text=previous_item.content[0].text,
|
||||||
))
|
))
|
||||||
|
yield _increment_sequence_number_and_return(
|
||||||
|
ResponseReasoningPartDoneEvent(
|
||||||
|
type="response.reasoning_part.done",
|
||||||
|
sequence_number=-1,
|
||||||
|
item_id=current_item_id,
|
||||||
|
output_index=current_output_index,
|
||||||
|
content_index=current_content_index,
|
||||||
|
part=content,
|
||||||
|
))
|
||||||
yield _increment_sequence_number_and_return(
|
yield _increment_sequence_number_and_return(
|
||||||
ResponseOutputItemDoneEvent(
|
ResponseOutputItemDoneEvent(
|
||||||
type="response.output_item.done",
|
type="response.output_item.done",
|
||||||
@ -1412,17 +1422,15 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
))
|
))
|
||||||
current_content_index += 1
|
current_content_index += 1
|
||||||
yield _increment_sequence_number_and_return(
|
yield _increment_sequence_number_and_return(
|
||||||
ResponseContentPartAddedEvent(
|
ResponseReasoningPartAddedEvent(
|
||||||
type="response.content_part.added",
|
type="response.reasoning_part.added",
|
||||||
sequence_number=-1,
|
sequence_number=-1,
|
||||||
output_index=current_output_index,
|
output_index=current_output_index,
|
||||||
item_id=current_item_id,
|
item_id=current_item_id,
|
||||||
content_index=current_content_index,
|
content_index=current_content_index,
|
||||||
part=ResponseOutputText(
|
part=ResponseReasoningTextContent(
|
||||||
type="output_text",
|
|
||||||
text="",
|
text="",
|
||||||
annotations=[],
|
type="reasoning_text",
|
||||||
logprobs=[],
|
|
||||||
),
|
),
|
||||||
))
|
))
|
||||||
yield _increment_sequence_number_and_return(
|
yield _increment_sequence_number_and_return(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user