mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-08 21:15:46 +08:00
[V1] Make AsyncLLMEngine v1-v0 opaque (#11383)
Signed-off-by: Ricky Xu <xuchen727@hotmail.com>
This commit is contained in:
parent
51ff216d85
commit
584f0ae40d
@ -1256,3 +1256,10 @@ class AsyncLLMEngine(EngineClient):
|
|||||||
self.engine.model_executor.stop_profile()
|
self.engine.model_executor.stop_profile()
|
||||||
else:
|
else:
|
||||||
self.engine.model_executor._run_workers("stop_profile")
|
self.engine.model_executor._run_workers("stop_profile")
|
||||||
|
|
||||||
|
|
||||||
|
# TODO(v1): Remove this class proxy when V1 goes default.
|
||||||
|
if envs.VLLM_USE_V1:
|
||||||
|
from vllm.v1.engine.async_llm import AsyncLLM
|
||||||
|
|
||||||
|
AsyncLLMEngine = AsyncLLM # type: ignore
|
||||||
|
|||||||
@ -27,6 +27,7 @@ from typing_extensions import assert_never
|
|||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
from vllm.config import ModelConfig
|
from vllm.config import ModelConfig
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||||
|
from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore
|
||||||
from vllm.engine.multiprocessing.client import MQLLMEngineClient
|
from vllm.engine.multiprocessing.client import MQLLMEngineClient
|
||||||
from vllm.engine.multiprocessing.engine import run_mp_engine
|
from vllm.engine.multiprocessing.engine import run_mp_engine
|
||||||
from vllm.engine.protocol import EngineClient
|
from vllm.engine.protocol import EngineClient
|
||||||
@ -66,11 +67,6 @@ from vllm.utils import (FlexibleArgumentParser, get_open_zmq_ipc_path,
|
|||||||
is_valid_ipv6_address)
|
is_valid_ipv6_address)
|
||||||
from vllm.version import __version__ as VLLM_VERSION
|
from vllm.version import __version__ as VLLM_VERSION
|
||||||
|
|
||||||
if envs.VLLM_USE_V1:
|
|
||||||
from vllm.v1.engine.async_llm import AsyncLLMEngine # type: ignore
|
|
||||||
else:
|
|
||||||
from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore
|
|
||||||
|
|
||||||
TIMEOUT_KEEP_ALIVE = 5 # seconds
|
TIMEOUT_KEEP_ALIVE = 5 # seconds
|
||||||
|
|
||||||
prometheus_multiproc_dir: tempfile.TemporaryDirectory
|
prometheus_multiproc_dir: tempfile.TemporaryDirectory
|
||||||
|
|||||||
@ -98,7 +98,7 @@ class AsyncLLM(EngineClient):
|
|||||||
start_engine_loop: bool = True,
|
start_engine_loop: bool = True,
|
||||||
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
||||||
stat_loggers: Optional[Dict[str, StatLoggerBase]] = None,
|
stat_loggers: Optional[Dict[str, StatLoggerBase]] = None,
|
||||||
) -> "AsyncLLMEngine":
|
) -> "AsyncLLM":
|
||||||
"""Create an AsyncLLM from the EngineArgs."""
|
"""Create an AsyncLLM from the EngineArgs."""
|
||||||
|
|
||||||
# Create the engine configs.
|
# Create the engine configs.
|
||||||
@ -386,7 +386,3 @@ class AsyncLLM(EngineClient):
|
|||||||
@property
|
@property
|
||||||
def dead_error(self) -> BaseException:
|
def dead_error(self) -> BaseException:
|
||||||
return Exception() # TODO: implement
|
return Exception() # TODO: implement
|
||||||
|
|
||||||
|
|
||||||
# Retain V0 name for backwards compatibility.
|
|
||||||
AsyncLLMEngine = AsyncLLM
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user