[Perf] use cpu all reduce to avoid sync when async_scheduling & dp > 1 (#29311)

Signed-off-by: zhuhaoran <zhuhaoran.zhr@alibaba-inc.com>
This commit is contained in:
zhrrr 2025-11-25 15:31:07 +08:00 committed by GitHub
parent 888152bf87
commit f242cfcdd5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1570,6 +1570,12 @@ class EngineArgs:
model_config.skip_tokenizer_init = True
logger.info("Skipping tokenizer initialization for tokens-only mode.")
if self.async_scheduling and not self.disable_nccl_for_dp_synchronization:
logger.info(
"Disabling NCCL for DP synchronization when using async scheduling."
)
self.disable_nccl_for_dp_synchronization = True
# Forward the deprecated CLI args to the EPLB config.
if self.num_redundant_experts is not None:
self.eplb_config.num_redundant_experts = self.num_redundant_experts