[Misc] refactor: simplify EngineCoreClient.make_async_mp_client in AsyncLLM (#18817)

Signed-off-by: googs1025 <googs1025@gmail.com>
This commit is contained in:
CYJiang 2025-06-05 06:37:25 +08:00 committed by GitHub
parent c3fd4d669a
commit 23027e2daf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 21 additions and 16 deletions

View File

@ -28,8 +28,7 @@ from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
from vllm.usage.usage_lib import UsageContext
from vllm.utils import Device, cdiv
from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.core_client import (AsyncMPClient, DPAsyncMPClient,
RayDPClient)
from vllm.v1.engine.core_client import EngineCoreClient
from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError
from vllm.v1.engine.output_processor import (OutputProcessor,
RequestOutputCollector)
@ -121,15 +120,8 @@ class AsyncLLM(EngineClient):
log_stats=self.log_stats)
# EngineCore (starts the engine in background process).
core_client_class: type[AsyncMPClient]
if vllm_config.parallel_config.data_parallel_size == 1:
core_client_class = AsyncMPClient
elif vllm_config.parallel_config.data_parallel_backend == "ray":
core_client_class = RayDPClient
else:
core_client_class = DPAsyncMPClient
self.engine_core = core_client_class(
self.engine_core = EngineCoreClient.make_async_mp_client(
vllm_config=vllm_config,
executor_class=executor_class,
log_stats=self.log_stats,

View File

@ -68,18 +68,31 @@ class EngineCoreClient(ABC):
"is not currently supported.")
if multiprocess_mode and asyncio_mode:
if vllm_config.parallel_config.data_parallel_size > 1:
if vllm_config.parallel_config.data_parallel_backend == "ray":
return RayDPClient(vllm_config, executor_class, log_stats)
return DPAsyncMPClient(vllm_config, executor_class, log_stats)
return AsyncMPClient(vllm_config, executor_class, log_stats)
return EngineCoreClient.make_async_mp_client(
vllm_config, executor_class, log_stats)
if multiprocess_mode and not asyncio_mode:
return SyncMPClient(vllm_config, executor_class, log_stats)
return InprocClient(vllm_config, executor_class, log_stats)
@staticmethod
def make_async_mp_client(
vllm_config: VllmConfig,
executor_class: type[Executor],
log_stats: bool,
client_addresses: Optional[dict[str, str]] = None,
client_index: int = 0,
) -> "MPClient":
if vllm_config.parallel_config.data_parallel_size > 1:
if vllm_config.parallel_config.data_parallel_backend == "ray":
return RayDPClient(vllm_config, executor_class, log_stats,
client_addresses, client_index)
return DPAsyncMPClient(vllm_config, executor_class, log_stats,
client_addresses, client_index)
return AsyncMPClient(vllm_config, executor_class, log_stats,
client_addresses, client_index)
@abstractmethod
def shutdown(self):
...