[BugFix][Core] Fix error when enable async-scheduling in multi-node env (#25887)

Signed-off-by: Lehua Ding <lehuading@tencent.com>
Signed-off-by: Lehua Ding <lehuading@qq.com>
Co-authored-by: Benjamin Chislett <chislett.ben@gmail.com>
This commit is contained in:
Lehua Ding 2025-10-18 06:16:18 +08:00 committed by GitHub
parent f50cc221ea
commit 6367bde739
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1440,13 +1440,6 @@ class EngineArgs:
)
if self.async_scheduling:
# Async scheduling does not work with the uniprocess backend.
if self.distributed_executor_backend is None:
self.distributed_executor_backend = "mp"
logger.info(
"Defaulting to mp-based distributed executor "
"backend for async scheduling."
)
if self.pipeline_parallel_size > 1:
raise ValueError(
"Async scheduling is not supported with pipeline-parallel-size > 1."
@ -1503,6 +1496,15 @@ class EngineArgs:
_api_process_rank=self._api_process_rank,
)
if self.async_scheduling and (
parallel_config.distributed_executor_backend not in ("mp", "uni")
):
raise ValueError(
"Currently, async scheduling only supports `mp` or `uni` "
"distributed executor backend, but you choose "
f"`{parallel_config.distributed_executor_backend}`."
)
speculative_config = self.create_speculative_config(
target_model_config=model_config,
target_parallel_config=parallel_config,