Better error when world size is larger than node and distributed_executor_backend is not set (#30140)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2026-03-16 16:27:15 +08:00 · 2025-12-06 04:53:52 +00:00 · 2025-12-06 04:53:52 +00:00 · bf4a901af9
commit bf4a901af9
parent 7e31c3a3f6
1 changed files with 13 additions and 9 deletions
--- a/vllm/config/parallel.py
+++ b/vllm/config/parallel.py
@ -184,13 +184,14 @@ class ParallelConfig:
    distributed_executor_backend: (
        str | DistributedExecutorBackend | type[Executor] | None
    ) = None
-    """Backend to use for distributed model
-    workers, either "ray" or "mp" (multiprocessing). If the product
-    of pipeline_parallel_size and tensor_parallel_size is less than
-    or equal to the number of GPUs available, "mp" will be used to
-    keep processing on a single host. Otherwise, this will default
-    to "ray" if Ray is installed and fail otherwise. Note that tpu
-    only support Ray for distributed inference."""
+    """Backend to use for distributed model workers, either "ray" or "mp"
+    (multiprocessing). If the product of pipeline_parallel_size and tensor_parallel_size
+    is less than or equal to the number of GPUs available, "mp" will be used to
+    keep processing on a single host. Otherwise, an error will be raised. To use "mp"
+    you must also set nnodes, and to use "ray" you must manually set
+    distributed_executor_backend to "ray".
+
+    Note that tpu only support Ray for distributed inference."""

    worker_cls: str = "auto"
    """The full name of the worker class to use. If "auto", the worker class
@ -566,8 +567,11 @@ class ParallelConfig:
            ):
                gpu_count = cuda_device_count_stateless()
                raise ValueError(
-                    f"Tensor parallel size ({self.world_size}) cannot be "
-                    f"larger than the number of available GPUs ({gpu_count})."
+                    f"World size ({self.world_size}) is larger than the number of "
+                    f"available GPUs ({gpu_count}) in this node. If this is "
+                    "intentional and you are using:\n"
+                    "- ray, set '--distributed-executor-backend ray'.\n"
+                    "- multiprocessing, set '--nnodes' appropriately."
                )
            elif self.data_parallel_backend == "ray":
                logger.info(