From bf4a901af91e431a4cdb51ed4557b31bf89c0e5d Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Sat, 6 Dec 2025 04:53:52 +0000 Subject: [PATCH] Better error when world size is larger than node and `distributed_executor_backend` is not set (#30140) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/config/parallel.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index 3a768bcd4f2ce..0327832c4fb8c 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -184,13 +184,14 @@ class ParallelConfig: distributed_executor_backend: ( str | DistributedExecutorBackend | type[Executor] | None ) = None - """Backend to use for distributed model - workers, either "ray" or "mp" (multiprocessing). If the product - of pipeline_parallel_size and tensor_parallel_size is less than - or equal to the number of GPUs available, "mp" will be used to - keep processing on a single host. Otherwise, this will default - to "ray" if Ray is installed and fail otherwise. Note that tpu - only support Ray for distributed inference.""" + """Backend to use for distributed model workers, either "ray" or "mp" + (multiprocessing). If the product of pipeline_parallel_size and tensor_parallel_size + is less than or equal to the number of GPUs available, "mp" will be used to + keep processing on a single host. Otherwise, an error will be raised. To use "mp" + you must also set nnodes, and to use "ray" you must manually set + distributed_executor_backend to "ray". + + Note that tpu only support Ray for distributed inference.""" worker_cls: str = "auto" """The full name of the worker class to use. If "auto", the worker class @@ -566,8 +567,11 @@ class ParallelConfig: ): gpu_count = cuda_device_count_stateless() raise ValueError( - f"Tensor parallel size ({self.world_size}) cannot be " - f"larger than the number of available GPUs ({gpu_count})." + f"World size ({self.world_size}) is larger than the number of " + f"available GPUs ({gpu_count}) in this node. If this is " + "intentional and you are using:\n" + "- ray, set '--distributed-executor-backend ray'.\n" + "- multiprocessing, set '--nnodes' appropriately." ) elif self.data_parallel_backend == "ray": logger.info(