updated

Signed-off-by: Robert Shaw <rshaw@neuralmagic.com>
2026-07-06 02:07:08 +08:00 · 2025-03-22 18:50:48 -04:00 · 2025-03-22 18:50:48 -04:00 · 24cbbe4778
commit 24cbbe4778
parent 2fec6e0b5c
2 changed files with 3 additions and 17 deletions
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@ -137,8 +137,9 @@ async def build_async_engine_client(
    # Context manager to handle engine_client lifecycle
    # Ensures everything is shutdown and cleaned up on error/exit
    engine_args = AsyncEngineArgs.from_cli_args(args)
+
    async with build_async_engine_client_from_engine_args(
-        engine_args, args.disable_frontend_multiprocessing) as engine:
+            engine_args, args.disable_frontend_multiprocessing) as engine:
        yield engine


@ -159,6 +160,7 @@ async def build_async_engine_client_from_engine_args(
    usage_context = UsageContext.OPENAI_API_SERVER
    vllm_config = engine_args.create_engine_config(usage_context=usage_context)

+    # V1 AsyncLLM.
    if envs.VLLM_USE_V1:
        if disable_frontend_multiprocessing:
            logger.warning(
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@ -1649,19 +1649,3 @@ class TranscriptionResponseVerbose(OpenAIBaseModel):

    words: Optional[list[TranscriptionWord]] = None
    """Extracted words and their corresponding timestamps."""
-
-
-class ZmqMsgRequest(BaseModel):
-    request_id: str
-    type: str
-    body: Union[CompletionRequest]
-
-
-class ZmqMsgResponse(BaseModel):
-    request_id: str
-    type: str
-    stop: bool = True
-    body_type: Literal["str", "response"] = "str"
-    body: str
-
-    model_config = ConfigDict(arbitrary_types_allowed=True)