Signed-off-by: Robert Shaw <rshaw@neuralmagic.com>
This commit is contained in:
Robert Shaw 2025-03-22 18:50:48 -04:00
parent 2fec6e0b5c
commit 24cbbe4778
2 changed files with 3 additions and 17 deletions

View File

@ -137,8 +137,9 @@ async def build_async_engine_client(
# Context manager to handle engine_client lifecycle
# Ensures everything is shutdown and cleaned up on error/exit
engine_args = AsyncEngineArgs.from_cli_args(args)
async with build_async_engine_client_from_engine_args(
engine_args, args.disable_frontend_multiprocessing) as engine:
engine_args, args.disable_frontend_multiprocessing) as engine:
yield engine
@ -159,6 +160,7 @@ async def build_async_engine_client_from_engine_args(
usage_context = UsageContext.OPENAI_API_SERVER
vllm_config = engine_args.create_engine_config(usage_context=usage_context)
# V1 AsyncLLM.
if envs.VLLM_USE_V1:
if disable_frontend_multiprocessing:
logger.warning(

View File

@ -1649,19 +1649,3 @@ class TranscriptionResponseVerbose(OpenAIBaseModel):
words: Optional[list[TranscriptionWord]] = None
"""Extracted words and their corresponding timestamps."""
class ZmqMsgRequest(BaseModel):
request_id: str
type: str
body: Union[CompletionRequest]
class ZmqMsgResponse(BaseModel):
request_id: str
type: str
stop: bool = True
body_type: Literal["str", "response"] = "str"
body: str
model_config = ConfigDict(arbitrary_types_allowed=True)