mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 10:46:05 +08:00
[gpt-oss] use vLLM instead of openai types for streaming (#25186)
Signed-off-by: Andrew Xia <axia@meta.com> Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com>
This commit is contained in:
parent
2ce26b9b5d
commit
5db1870bb9
@ -379,6 +379,14 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool):
|
||||
if event.type == "response.created":
|
||||
resp_id = event.response.id
|
||||
|
||||
# test vllm custom types are in the response
|
||||
if event.type in [
|
||||
"response.completed", "response.in_progress",
|
||||
"response.created"
|
||||
]:
|
||||
assert 'input_messages' in event.response.model_extra
|
||||
assert 'output_messages' in event.response.model_extra
|
||||
|
||||
if current_event_mode != event.type:
|
||||
current_event_mode = event.type
|
||||
print(f"\n[{event.type}] ", end="", flush=True)
|
||||
|
||||
@ -17,20 +17,32 @@ from openai.types.chat.chat_completion_audio import (
|
||||
ChatCompletionAudio as OpenAIChatCompletionAudio)
|
||||
from openai.types.chat.chat_completion_message import (
|
||||
Annotation as OpenAIAnnotation)
|
||||
# yapf: enable
|
||||
from openai.types.responses import (
|
||||
ResponseCodeInterpreterCallCodeDeltaEvent,
|
||||
ResponseCodeInterpreterCallCodeDoneEvent,
|
||||
ResponseCodeInterpreterCallCompletedEvent,
|
||||
ResponseCodeInterpreterCallInProgressEvent,
|
||||
ResponseCodeInterpreterCallInterpretingEvent, ResponseCompletedEvent,
|
||||
ResponseContentPartAddedEvent, ResponseContentPartDoneEvent,
|
||||
ResponseCreatedEvent, ResponseFunctionToolCall, ResponseInProgressEvent,
|
||||
ResponseInputItemParam, ResponseOutputItem, ResponseOutputItemAddedEvent,
|
||||
ResponseOutputItemDoneEvent, ResponsePrompt, ResponseReasoningItem,
|
||||
ResponseReasoningTextDeltaEvent, ResponseReasoningTextDoneEvent,
|
||||
ResponseStatus, ResponseWebSearchCallCompletedEvent,
|
||||
ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent)
|
||||
ResponseCodeInterpreterCallInterpretingEvent)
|
||||
from openai.types.responses import (
|
||||
ResponseCompletedEvent as OpenAIResponseCompletedEvent)
|
||||
from openai.types.responses import (ResponseContentPartAddedEvent,
|
||||
ResponseContentPartDoneEvent)
|
||||
from openai.types.responses import (
|
||||
ResponseCreatedEvent as OpenAIResponseCreatedEvent)
|
||||
from openai.types.responses import ResponseFunctionToolCall
|
||||
from openai.types.responses import (
|
||||
ResponseInProgressEvent as OpenAIResponseInProgressEvent)
|
||||
from openai.types.responses import (ResponseInputItemParam, ResponseOutputItem,
|
||||
ResponseOutputItemAddedEvent,
|
||||
ResponseOutputItemDoneEvent,
|
||||
ResponsePrompt, ResponseReasoningItem,
|
||||
ResponseReasoningTextDeltaEvent,
|
||||
ResponseReasoningTextDoneEvent,
|
||||
ResponseStatus,
|
||||
ResponseWebSearchCallCompletedEvent,
|
||||
ResponseWebSearchCallInProgressEvent,
|
||||
ResponseWebSearchCallSearchingEvent)
|
||||
# yapf: enable
|
||||
from openai.types.responses.response_reasoning_item import (
|
||||
Content as ResponseReasoningTextContent)
|
||||
|
||||
@ -2077,10 +2089,24 @@ class ResponseReasoningPartAddedEvent(OpenAIBaseModel):
|
||||
"""The type of the event. Always `response.reasoning_part.added`."""
|
||||
|
||||
|
||||
# vLLM Streaming Events
|
||||
# Note: we override the response type with the vLLM ResponsesResponse type
|
||||
class ResponseCompletedEvent(OpenAIResponseCompletedEvent):
|
||||
response: ResponsesResponse # type: ignore[override]
|
||||
|
||||
|
||||
class ResponseCreatedEvent(OpenAIResponseCreatedEvent):
|
||||
response: ResponsesResponse # type: ignore[override]
|
||||
|
||||
|
||||
class ResponseInProgressEvent(OpenAIResponseInProgressEvent):
|
||||
response: ResponsesResponse # type: ignore[override]
|
||||
|
||||
|
||||
StreamingResponsesResponse: TypeAlias = Union[
|
||||
ResponseCreatedEvent,
|
||||
ResponseInProgressEvent,
|
||||
ResponseCompletedEvent,
|
||||
"ResponseCreatedEvent",
|
||||
"ResponseInProgressEvent",
|
||||
"ResponseCompletedEvent",
|
||||
ResponseOutputItemAddedEvent,
|
||||
ResponseOutputItemDoneEvent,
|
||||
ResponseContentPartAddedEvent,
|
||||
|
||||
@ -22,16 +22,16 @@ from openai.types.responses import (
|
||||
ResponseCodeInterpreterCallCompletedEvent,
|
||||
ResponseCodeInterpreterCallInProgressEvent,
|
||||
ResponseCodeInterpreterCallInterpretingEvent,
|
||||
ResponseCodeInterpreterToolCallParam, ResponseCompletedEvent,
|
||||
ResponseContentPartAddedEvent, ResponseContentPartDoneEvent,
|
||||
ResponseCreatedEvent, ResponseFunctionToolCall, ResponseFunctionWebSearch,
|
||||
ResponseInProgressEvent, ResponseOutputItem, ResponseOutputItemAddedEvent,
|
||||
ResponseOutputItemDoneEvent, ResponseOutputMessage, ResponseOutputText,
|
||||
ResponseReasoningItem, ResponseReasoningTextDeltaEvent,
|
||||
ResponseReasoningTextDoneEvent, ResponseStatus, ResponseTextDeltaEvent,
|
||||
ResponseTextDoneEvent, ResponseWebSearchCallCompletedEvent,
|
||||
ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent,
|
||||
response_function_web_search, response_text_delta_event)
|
||||
ResponseCodeInterpreterToolCallParam, ResponseContentPartAddedEvent,
|
||||
ResponseContentPartDoneEvent, ResponseFunctionToolCall,
|
||||
ResponseFunctionWebSearch, ResponseOutputItem,
|
||||
ResponseOutputItemAddedEvent, ResponseOutputItemDoneEvent,
|
||||
ResponseOutputMessage, ResponseOutputText, ResponseReasoningItem,
|
||||
ResponseReasoningTextDeltaEvent, ResponseReasoningTextDoneEvent,
|
||||
ResponseStatus, ResponseTextDeltaEvent, ResponseTextDoneEvent,
|
||||
ResponseWebSearchCallCompletedEvent, ResponseWebSearchCallInProgressEvent,
|
||||
ResponseWebSearchCallSearchingEvent, response_function_web_search,
|
||||
response_text_delta_event)
|
||||
from openai.types.responses.response_output_text import (Logprob,
|
||||
LogprobTopLogprob)
|
||||
# yapf: enable
|
||||
@ -58,6 +58,9 @@ from vllm.entrypoints.openai.protocol import (DeltaMessage, ErrorResponse,
|
||||
InputTokensDetails,
|
||||
OutputTokensDetails,
|
||||
RequestResponseMetadata,
|
||||
ResponseCompletedEvent,
|
||||
ResponseCreatedEvent,
|
||||
ResponseInProgressEvent,
|
||||
ResponseReasoningPartAddedEvent,
|
||||
ResponseReasoningPartDoneEvent,
|
||||
ResponsesRequest,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user