[V1] Make AsyncLLMEngine v1-v0 opaque (#11383)

Signed-off-by: Ricky Xu <xuchen727@hotmail.com>
2026-06-08 21:15:46 +08:00 · 2024-12-20 23:14:08 -08:00 · 2024-12-20 23:14:08 -08:00 · 584f0ae40d
commit 584f0ae40d
parent 51ff216d85
3 changed files with 9 additions and 10 deletions
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@ -1256,3 +1256,10 @@ class AsyncLLMEngine(EngineClient):
            self.engine.model_executor.stop_profile()
        else:
            self.engine.model_executor._run_workers("stop_profile")
 # TODO(v1): Remove this class proxy when V1 goes default.
 if envs.VLLM_USE_V1:
    from vllm.v1.engine.async_llm import AsyncLLM
    AsyncLLMEngine = AsyncLLM  # type: ignore
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@ -27,6 +27,7 @@ from typing_extensions import assert_never
 import vllm.envs as envs
 from vllm.config import ModelConfig
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.engine.async_llm_engine import AsyncLLMEngine  # type: ignore
 from vllm.engine.multiprocessing.client import MQLLMEngineClient
 from vllm.engine.multiprocessing.engine import run_mp_engine
 from vllm.engine.protocol import EngineClient
@ -66,11 +67,6 @@ from vllm.utils import (FlexibleArgumentParser, get_open_zmq_ipc_path,
                        is_valid_ipv6_address)
 from vllm.version import __version__ as VLLM_VERSION
 if envs.VLLM_USE_V1:
    from vllm.v1.engine.async_llm import AsyncLLMEngine  # type: ignore
 else:
    from vllm.engine.async_llm_engine import AsyncLLMEngine  # type: ignore
 TIMEOUT_KEEP_ALIVE = 5  # seconds
 prometheus_multiproc_dir: tempfile.TemporaryDirectory
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@ -98,7 +98,7 @@ class AsyncLLM(EngineClient):
        start_engine_loop: bool = True,
        usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
        stat_loggers: Optional[Dict[str, StatLoggerBase]] = None,
-    ) -> "AsyncLLMEngine":
+    ) -> "AsyncLLM":
        """Create an AsyncLLM from the EngineArgs."""
        # Create the engine configs.
@ -386,7 +386,3 @@ class AsyncLLM(EngineClient):
    @property
    def dead_error(self) -> BaseException:
        return Exception()  # TODO: implement
 # Retain V0 name for backwards compatibility.
 AsyncLLMEngine = AsyncLLM