mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 20:04:27 +08:00
[Perf] use cpu all reduce to avoid sync when async_scheduling & dp > 1 (#29311)
Signed-off-by: zhuhaoran <zhuhaoran.zhr@alibaba-inc.com>
This commit is contained in:
parent
888152bf87
commit
f242cfcdd5
@ -1570,6 +1570,12 @@ class EngineArgs:
|
||||
model_config.skip_tokenizer_init = True
|
||||
logger.info("Skipping tokenizer initialization for tokens-only mode.")
|
||||
|
||||
if self.async_scheduling and not self.disable_nccl_for_dp_synchronization:
|
||||
logger.info(
|
||||
"Disabling NCCL for DP synchronization when using async scheduling."
|
||||
)
|
||||
self.disable_nccl_for_dp_synchronization = True
|
||||
|
||||
# Forward the deprecated CLI args to the EPLB config.
|
||||
if self.num_redundant_experts is not None:
|
||||
self.eplb_config.num_redundant_experts = self.num_redundant_experts
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user