[Doc] Update help text for --distributed-executor-backend (#10231)

Signed-off-by: Russell Bryant <rbryant@redhat.com>
2026-01-28 17:37:15 +08:00 · 2024-11-11 20:55:09 -05:00 · 2024-11-11 20:55:09 -05:00 · 9cdba9669c
commit 9cdba9669c
parent d1c6799b88
2 changed files with 14 additions and 6 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -951,9 +951,12 @@ class ParallelConfig:
            https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler.
        placement_group: ray distributed model workers placement group.
        distributed_executor_backend: Backend to use for distributed model
-            workers, either "ray" or "mp" (multiprocessing). If either
-            pipeline_parallel_size or tensor_parallel_size is greater than 1,
-            will default to "ray" if Ray is installed or "mp" otherwise.
+            workers, either "ray" or "mp" (multiprocessing). If the product
+            of pipeline_parallel_size and tensor_parallel_size is less than
+            or equal to the number of GPUs available, "mp" will be used to
+            keep processing on a single host. Otherwise, this will default
+            to "ray" if Ray is installed and fail otherwise. Note that tpu
+            and hpu only support Ray for distributed inference.
    """

    def __init__(
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@ -369,9 +369,14 @@ class EngineArgs:
            '--distributed-executor-backend',
            choices=['ray', 'mp'],
            default=EngineArgs.distributed_executor_backend,
-            help='Backend to use for distributed serving. When more than 1 GPU '
-            'is used, will be automatically set to "ray" if installed '
-            'or "mp" (multiprocessing) otherwise.')
+            help='Backend to use for distributed model '
+            'workers, either "ray" or "mp" (multiprocessing). If the product '
+            'of pipeline_parallel_size and tensor_parallel_size is less than '
+            'or equal to the number of GPUs available, "mp" will be used to '
+            'keep processing on a single host. Otherwise, this will default '
+            'to "ray" if Ray is installed and fail otherwise. Note that tpu '
+            'and hpu only support Ray for distributed inference.')
+
        parser.add_argument(
            '--worker-use-ray',
            action='store_true',