From 24cbbe477806c5e5fa68e9086eda253d1feab34f Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 22 Mar 2025 18:50:48 -0400 Subject: [PATCH] updated Signed-off-by: Robert Shaw --- vllm/entrypoints/openai/api_server.py | 4 +++- vllm/entrypoints/openai/protocol.py | 16 ---------------- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 41c4dd32442ca..f9b1d69a31d8c 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -137,8 +137,9 @@ async def build_async_engine_client( # Context manager to handle engine_client lifecycle # Ensures everything is shutdown and cleaned up on error/exit engine_args = AsyncEngineArgs.from_cli_args(args) + async with build_async_engine_client_from_engine_args( - engine_args, args.disable_frontend_multiprocessing) as engine: + engine_args, args.disable_frontend_multiprocessing) as engine: yield engine @@ -159,6 +160,7 @@ async def build_async_engine_client_from_engine_args( usage_context = UsageContext.OPENAI_API_SERVER vllm_config = engine_args.create_engine_config(usage_context=usage_context) + # V1 AsyncLLM. if envs.VLLM_USE_V1: if disable_frontend_multiprocessing: logger.warning( diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 23525d80995b6..a96ca1f757008 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -1649,19 +1649,3 @@ class TranscriptionResponseVerbose(OpenAIBaseModel): words: Optional[list[TranscriptionWord]] = None """Extracted words and their corresponding timestamps.""" - - -class ZmqMsgRequest(BaseModel): - request_id: str - type: str - body: Union[CompletionRequest] - - -class ZmqMsgResponse(BaseModel): - request_id: str - type: str - stop: bool = True - body_type: Literal["str", "response"] = "str" - body: str - - model_config = ConfigDict(arbitrary_types_allowed=True)