mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 00:25:23 +08:00
[gpt-oss] use vLLM instead of openai types for streaming (#25186)
Signed-off-by: Andrew Xia <axia@meta.com> Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com>
This commit is contained in:
parent
2ce26b9b5d
commit
5db1870bb9
@ -379,6 +379,14 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool):
|
|||||||
if event.type == "response.created":
|
if event.type == "response.created":
|
||||||
resp_id = event.response.id
|
resp_id = event.response.id
|
||||||
|
|
||||||
|
# test vllm custom types are in the response
|
||||||
|
if event.type in [
|
||||||
|
"response.completed", "response.in_progress",
|
||||||
|
"response.created"
|
||||||
|
]:
|
||||||
|
assert 'input_messages' in event.response.model_extra
|
||||||
|
assert 'output_messages' in event.response.model_extra
|
||||||
|
|
||||||
if current_event_mode != event.type:
|
if current_event_mode != event.type:
|
||||||
current_event_mode = event.type
|
current_event_mode = event.type
|
||||||
print(f"\n[{event.type}] ", end="", flush=True)
|
print(f"\n[{event.type}] ", end="", flush=True)
|
||||||
|
|||||||
@ -17,20 +17,32 @@ from openai.types.chat.chat_completion_audio import (
|
|||||||
ChatCompletionAudio as OpenAIChatCompletionAudio)
|
ChatCompletionAudio as OpenAIChatCompletionAudio)
|
||||||
from openai.types.chat.chat_completion_message import (
|
from openai.types.chat.chat_completion_message import (
|
||||||
Annotation as OpenAIAnnotation)
|
Annotation as OpenAIAnnotation)
|
||||||
# yapf: enable
|
|
||||||
from openai.types.responses import (
|
from openai.types.responses import (
|
||||||
ResponseCodeInterpreterCallCodeDeltaEvent,
|
ResponseCodeInterpreterCallCodeDeltaEvent,
|
||||||
ResponseCodeInterpreterCallCodeDoneEvent,
|
ResponseCodeInterpreterCallCodeDoneEvent,
|
||||||
ResponseCodeInterpreterCallCompletedEvent,
|
ResponseCodeInterpreterCallCompletedEvent,
|
||||||
ResponseCodeInterpreterCallInProgressEvent,
|
ResponseCodeInterpreterCallInProgressEvent,
|
||||||
ResponseCodeInterpreterCallInterpretingEvent, ResponseCompletedEvent,
|
ResponseCodeInterpreterCallInterpretingEvent)
|
||||||
ResponseContentPartAddedEvent, ResponseContentPartDoneEvent,
|
from openai.types.responses import (
|
||||||
ResponseCreatedEvent, ResponseFunctionToolCall, ResponseInProgressEvent,
|
ResponseCompletedEvent as OpenAIResponseCompletedEvent)
|
||||||
ResponseInputItemParam, ResponseOutputItem, ResponseOutputItemAddedEvent,
|
from openai.types.responses import (ResponseContentPartAddedEvent,
|
||||||
ResponseOutputItemDoneEvent, ResponsePrompt, ResponseReasoningItem,
|
ResponseContentPartDoneEvent)
|
||||||
ResponseReasoningTextDeltaEvent, ResponseReasoningTextDoneEvent,
|
from openai.types.responses import (
|
||||||
ResponseStatus, ResponseWebSearchCallCompletedEvent,
|
ResponseCreatedEvent as OpenAIResponseCreatedEvent)
|
||||||
ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent)
|
from openai.types.responses import ResponseFunctionToolCall
|
||||||
|
from openai.types.responses import (
|
||||||
|
ResponseInProgressEvent as OpenAIResponseInProgressEvent)
|
||||||
|
from openai.types.responses import (ResponseInputItemParam, ResponseOutputItem,
|
||||||
|
ResponseOutputItemAddedEvent,
|
||||||
|
ResponseOutputItemDoneEvent,
|
||||||
|
ResponsePrompt, ResponseReasoningItem,
|
||||||
|
ResponseReasoningTextDeltaEvent,
|
||||||
|
ResponseReasoningTextDoneEvent,
|
||||||
|
ResponseStatus,
|
||||||
|
ResponseWebSearchCallCompletedEvent,
|
||||||
|
ResponseWebSearchCallInProgressEvent,
|
||||||
|
ResponseWebSearchCallSearchingEvent)
|
||||||
|
# yapf: enable
|
||||||
from openai.types.responses.response_reasoning_item import (
|
from openai.types.responses.response_reasoning_item import (
|
||||||
Content as ResponseReasoningTextContent)
|
Content as ResponseReasoningTextContent)
|
||||||
|
|
||||||
@ -2077,10 +2089,24 @@ class ResponseReasoningPartAddedEvent(OpenAIBaseModel):
|
|||||||
"""The type of the event. Always `response.reasoning_part.added`."""
|
"""The type of the event. Always `response.reasoning_part.added`."""
|
||||||
|
|
||||||
|
|
||||||
|
# vLLM Streaming Events
|
||||||
|
# Note: we override the response type with the vLLM ResponsesResponse type
|
||||||
|
class ResponseCompletedEvent(OpenAIResponseCompletedEvent):
|
||||||
|
response: ResponsesResponse # type: ignore[override]
|
||||||
|
|
||||||
|
|
||||||
|
class ResponseCreatedEvent(OpenAIResponseCreatedEvent):
|
||||||
|
response: ResponsesResponse # type: ignore[override]
|
||||||
|
|
||||||
|
|
||||||
|
class ResponseInProgressEvent(OpenAIResponseInProgressEvent):
|
||||||
|
response: ResponsesResponse # type: ignore[override]
|
||||||
|
|
||||||
|
|
||||||
StreamingResponsesResponse: TypeAlias = Union[
|
StreamingResponsesResponse: TypeAlias = Union[
|
||||||
ResponseCreatedEvent,
|
"ResponseCreatedEvent",
|
||||||
ResponseInProgressEvent,
|
"ResponseInProgressEvent",
|
||||||
ResponseCompletedEvent,
|
"ResponseCompletedEvent",
|
||||||
ResponseOutputItemAddedEvent,
|
ResponseOutputItemAddedEvent,
|
||||||
ResponseOutputItemDoneEvent,
|
ResponseOutputItemDoneEvent,
|
||||||
ResponseContentPartAddedEvent,
|
ResponseContentPartAddedEvent,
|
||||||
|
|||||||
@ -22,16 +22,16 @@ from openai.types.responses import (
|
|||||||
ResponseCodeInterpreterCallCompletedEvent,
|
ResponseCodeInterpreterCallCompletedEvent,
|
||||||
ResponseCodeInterpreterCallInProgressEvent,
|
ResponseCodeInterpreterCallInProgressEvent,
|
||||||
ResponseCodeInterpreterCallInterpretingEvent,
|
ResponseCodeInterpreterCallInterpretingEvent,
|
||||||
ResponseCodeInterpreterToolCallParam, ResponseCompletedEvent,
|
ResponseCodeInterpreterToolCallParam, ResponseContentPartAddedEvent,
|
||||||
ResponseContentPartAddedEvent, ResponseContentPartDoneEvent,
|
ResponseContentPartDoneEvent, ResponseFunctionToolCall,
|
||||||
ResponseCreatedEvent, ResponseFunctionToolCall, ResponseFunctionWebSearch,
|
ResponseFunctionWebSearch, ResponseOutputItem,
|
||||||
ResponseInProgressEvent, ResponseOutputItem, ResponseOutputItemAddedEvent,
|
ResponseOutputItemAddedEvent, ResponseOutputItemDoneEvent,
|
||||||
ResponseOutputItemDoneEvent, ResponseOutputMessage, ResponseOutputText,
|
ResponseOutputMessage, ResponseOutputText, ResponseReasoningItem,
|
||||||
ResponseReasoningItem, ResponseReasoningTextDeltaEvent,
|
ResponseReasoningTextDeltaEvent, ResponseReasoningTextDoneEvent,
|
||||||
ResponseReasoningTextDoneEvent, ResponseStatus, ResponseTextDeltaEvent,
|
ResponseStatus, ResponseTextDeltaEvent, ResponseTextDoneEvent,
|
||||||
ResponseTextDoneEvent, ResponseWebSearchCallCompletedEvent,
|
ResponseWebSearchCallCompletedEvent, ResponseWebSearchCallInProgressEvent,
|
||||||
ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent,
|
ResponseWebSearchCallSearchingEvent, response_function_web_search,
|
||||||
response_function_web_search, response_text_delta_event)
|
response_text_delta_event)
|
||||||
from openai.types.responses.response_output_text import (Logprob,
|
from openai.types.responses.response_output_text import (Logprob,
|
||||||
LogprobTopLogprob)
|
LogprobTopLogprob)
|
||||||
# yapf: enable
|
# yapf: enable
|
||||||
@ -58,6 +58,9 @@ from vllm.entrypoints.openai.protocol import (DeltaMessage, ErrorResponse,
|
|||||||
InputTokensDetails,
|
InputTokensDetails,
|
||||||
OutputTokensDetails,
|
OutputTokensDetails,
|
||||||
RequestResponseMetadata,
|
RequestResponseMetadata,
|
||||||
|
ResponseCompletedEvent,
|
||||||
|
ResponseCreatedEvent,
|
||||||
|
ResponseInProgressEvent,
|
||||||
ResponseReasoningPartAddedEvent,
|
ResponseReasoningPartAddedEvent,
|
||||||
ResponseReasoningPartDoneEvent,
|
ResponseReasoningPartDoneEvent,
|
||||||
ResponsesRequest,
|
ResponsesRequest,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user