mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-27 02:48:42 +08:00
[Misc] refactor: simplify EngineCoreClient.make_async_mp_client in AsyncLLM (#18817)
Signed-off-by: googs1025 <googs1025@gmail.com>
This commit is contained in:
parent
c3fd4d669a
commit
23027e2daf
@ -28,8 +28,7 @@ from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils import Device, cdiv
|
||||
from vllm.v1.engine import EngineCoreRequest
|
||||
from vllm.v1.engine.core_client import (AsyncMPClient, DPAsyncMPClient,
|
||||
RayDPClient)
|
||||
from vllm.v1.engine.core_client import EngineCoreClient
|
||||
from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError
|
||||
from vllm.v1.engine.output_processor import (OutputProcessor,
|
||||
RequestOutputCollector)
|
||||
@ -121,15 +120,8 @@ class AsyncLLM(EngineClient):
|
||||
log_stats=self.log_stats)
|
||||
|
||||
# EngineCore (starts the engine in background process).
|
||||
core_client_class: type[AsyncMPClient]
|
||||
if vllm_config.parallel_config.data_parallel_size == 1:
|
||||
core_client_class = AsyncMPClient
|
||||
elif vllm_config.parallel_config.data_parallel_backend == "ray":
|
||||
core_client_class = RayDPClient
|
||||
else:
|
||||
core_client_class = DPAsyncMPClient
|
||||
|
||||
self.engine_core = core_client_class(
|
||||
self.engine_core = EngineCoreClient.make_async_mp_client(
|
||||
vllm_config=vllm_config,
|
||||
executor_class=executor_class,
|
||||
log_stats=self.log_stats,
|
||||
|
||||
@ -68,18 +68,31 @@ class EngineCoreClient(ABC):
|
||||
"is not currently supported.")
|
||||
|
||||
if multiprocess_mode and asyncio_mode:
|
||||
if vllm_config.parallel_config.data_parallel_size > 1:
|
||||
if vllm_config.parallel_config.data_parallel_backend == "ray":
|
||||
return RayDPClient(vllm_config, executor_class, log_stats)
|
||||
return DPAsyncMPClient(vllm_config, executor_class, log_stats)
|
||||
|
||||
return AsyncMPClient(vllm_config, executor_class, log_stats)
|
||||
return EngineCoreClient.make_async_mp_client(
|
||||
vllm_config, executor_class, log_stats)
|
||||
|
||||
if multiprocess_mode and not asyncio_mode:
|
||||
return SyncMPClient(vllm_config, executor_class, log_stats)
|
||||
|
||||
return InprocClient(vllm_config, executor_class, log_stats)
|
||||
|
||||
@staticmethod
|
||||
def make_async_mp_client(
|
||||
vllm_config: VllmConfig,
|
||||
executor_class: type[Executor],
|
||||
log_stats: bool,
|
||||
client_addresses: Optional[dict[str, str]] = None,
|
||||
client_index: int = 0,
|
||||
) -> "MPClient":
|
||||
if vllm_config.parallel_config.data_parallel_size > 1:
|
||||
if vllm_config.parallel_config.data_parallel_backend == "ray":
|
||||
return RayDPClient(vllm_config, executor_class, log_stats,
|
||||
client_addresses, client_index)
|
||||
return DPAsyncMPClient(vllm_config, executor_class, log_stats,
|
||||
client_addresses, client_index)
|
||||
return AsyncMPClient(vllm_config, executor_class, log_stats,
|
||||
client_addresses, client_index)
|
||||
|
||||
@abstractmethod
|
||||
def shutdown(self):
|
||||
...
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user