diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 42bcd64ff11f..432b1eca45f3 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1440,13 +1440,6 @@ class EngineArgs: ) if self.async_scheduling: - # Async scheduling does not work with the uniprocess backend. - if self.distributed_executor_backend is None: - self.distributed_executor_backend = "mp" - logger.info( - "Defaulting to mp-based distributed executor " - "backend for async scheduling." - ) if self.pipeline_parallel_size > 1: raise ValueError( "Async scheduling is not supported with pipeline-parallel-size > 1." @@ -1503,6 +1496,15 @@ class EngineArgs: _api_process_rank=self._api_process_rank, ) + if self.async_scheduling and ( + parallel_config.distributed_executor_backend not in ("mp", "uni") + ): + raise ValueError( + "Currently, async scheduling only supports `mp` or `uni` " + "distributed executor backend, but you choose " + f"`{parallel_config.distributed_executor_backend}`." + ) + speculative_config = self.create_speculative_config( target_model_config=model_config, target_parallel_config=parallel_config,