[V1] Make AsyncLLMEngine v1-v0 opaque (#11383)

Signed-off-by: Ricky Xu <xuchen727@hotmail.com>
This commit is contained in:
Ricky Xu 2024-12-20 23:14:08 -08:00 committed by GitHub
parent 51ff216d85
commit 584f0ae40d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 9 additions and 10 deletions

View File

@ -1256,3 +1256,10 @@ class AsyncLLMEngine(EngineClient):
self.engine.model_executor.stop_profile()
else:
self.engine.model_executor._run_workers("stop_profile")
# TODO(v1): Remove this class proxy when V1 goes default.
if envs.VLLM_USE_V1:
from vllm.v1.engine.async_llm import AsyncLLM
AsyncLLMEngine = AsyncLLM # type: ignore

View File

@ -27,6 +27,7 @@ from typing_extensions import assert_never
import vllm.envs as envs
from vllm.config import ModelConfig
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore
from vllm.engine.multiprocessing.client import MQLLMEngineClient
from vllm.engine.multiprocessing.engine import run_mp_engine
from vllm.engine.protocol import EngineClient
@ -66,11 +67,6 @@ from vllm.utils import (FlexibleArgumentParser, get_open_zmq_ipc_path,
is_valid_ipv6_address)
from vllm.version import __version__ as VLLM_VERSION
if envs.VLLM_USE_V1:
from vllm.v1.engine.async_llm import AsyncLLMEngine # type: ignore
else:
from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore
TIMEOUT_KEEP_ALIVE = 5 # seconds
prometheus_multiproc_dir: tempfile.TemporaryDirectory

View File

@ -98,7 +98,7 @@ class AsyncLLM(EngineClient):
start_engine_loop: bool = True,
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
stat_loggers: Optional[Dict[str, StatLoggerBase]] = None,
) -> "AsyncLLMEngine":
) -> "AsyncLLM":
"""Create an AsyncLLM from the EngineArgs."""
# Create the engine configs.
@ -386,7 +386,3 @@ class AsyncLLM(EngineClient):
@property
def dead_error(self) -> BaseException:
return Exception() # TODO: implement
# Retain V0 name for backwards compatibility.
AsyncLLMEngine = AsyncLLM