mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-26 03:16:57 +08:00
[V1] Make AsyncLLMEngine v1-v0 opaque (#11383)
Signed-off-by: Ricky Xu <xuchen727@hotmail.com>
This commit is contained in:
parent
51ff216d85
commit
584f0ae40d
@ -1256,3 +1256,10 @@ class AsyncLLMEngine(EngineClient):
|
||||
self.engine.model_executor.stop_profile()
|
||||
else:
|
||||
self.engine.model_executor._run_workers("stop_profile")
|
||||
|
||||
|
||||
# TODO(v1): Remove this class proxy when V1 goes default.
|
||||
if envs.VLLM_USE_V1:
|
||||
from vllm.v1.engine.async_llm import AsyncLLM
|
||||
|
||||
AsyncLLMEngine = AsyncLLM # type: ignore
|
||||
|
||||
@ -27,6 +27,7 @@ from typing_extensions import assert_never
|
||||
import vllm.envs as envs
|
||||
from vllm.config import ModelConfig
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||
from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore
|
||||
from vllm.engine.multiprocessing.client import MQLLMEngineClient
|
||||
from vllm.engine.multiprocessing.engine import run_mp_engine
|
||||
from vllm.engine.protocol import EngineClient
|
||||
@ -66,11 +67,6 @@ from vllm.utils import (FlexibleArgumentParser, get_open_zmq_ipc_path,
|
||||
is_valid_ipv6_address)
|
||||
from vllm.version import __version__ as VLLM_VERSION
|
||||
|
||||
if envs.VLLM_USE_V1:
|
||||
from vllm.v1.engine.async_llm import AsyncLLMEngine # type: ignore
|
||||
else:
|
||||
from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore
|
||||
|
||||
TIMEOUT_KEEP_ALIVE = 5 # seconds
|
||||
|
||||
prometheus_multiproc_dir: tempfile.TemporaryDirectory
|
||||
|
||||
@ -98,7 +98,7 @@ class AsyncLLM(EngineClient):
|
||||
start_engine_loop: bool = True,
|
||||
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
||||
stat_loggers: Optional[Dict[str, StatLoggerBase]] = None,
|
||||
) -> "AsyncLLMEngine":
|
||||
) -> "AsyncLLM":
|
||||
"""Create an AsyncLLM from the EngineArgs."""
|
||||
|
||||
# Create the engine configs.
|
||||
@ -386,7 +386,3 @@ class AsyncLLM(EngineClient):
|
||||
@property
|
||||
def dead_error(self) -> BaseException:
|
||||
return Exception() # TODO: implement
|
||||
|
||||
|
||||
# Retain V0 name for backwards compatibility.
|
||||
AsyncLLMEngine = AsyncLLM
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user