diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 47e7ffded797..b99be1e5dfbc 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -586,6 +586,15 @@ class VllmConfig: else: self.scheduler_config.async_scheduling = True + if ( + self.scheduler_config.async_scheduling + and not self.parallel_config.disable_nccl_for_dp_synchronization + ): + logger.info( + "Disabling NCCL for DP synchronization when using async scheduling." + ) + self.parallel_config.disable_nccl_for_dp_synchronization = True + from vllm.platforms import current_platform if ( diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index aad0719548d1..ceac5407af6e 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1602,12 +1602,6 @@ class EngineArgs: model_config.skip_tokenizer_init = True logger.info("Skipping tokenizer initialization for tokens-only mode.") - if self.async_scheduling and not self.disable_nccl_for_dp_synchronization: - logger.info( - "Disabling NCCL for DP synchronization when using async scheduling." - ) - self.disable_nccl_for_dp_synchronization = True - parallel_config = ParallelConfig( pipeline_parallel_size=self.pipeline_parallel_size, tensor_parallel_size=self.tensor_parallel_size,