[Doc] Update help text for --distributed-executor-backend (#10231)

Signed-off-by: Russell Bryant <rbryant@redhat.com>
2025-12-15 18:06:03 +08:00 · 2024-11-11 20:55:09 -05:00 · 2024-11-11 20:55:09 -05:00 · 9cdba9669c
commit 9cdba9669c
parent d1c6799b88
2 changed files with 14 additions and 6 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -951,9 +951,12 @@ class ParallelConfig:
            https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler.
        placement_group: ray distributed model workers placement group.
        distributed_executor_backend: Backend to use for distributed model
-            workers, either "ray" or "mp" (multiprocessing). If either
+            workers, either "ray" or "mp" (multiprocessing). If the product
-            pipeline_parallel_size or tensor_parallel_size is greater than 1,
+            of pipeline_parallel_size and tensor_parallel_size is less than
-            will default to "ray" if Ray is installed or "mp" otherwise.
+            or equal to the number of GPUs available, "mp" will be used to
            keep processing on a single host. Otherwise, this will default
            to "ray" if Ray is installed and fail otherwise. Note that tpu
            and hpu only support Ray for distributed inference.
    """
    def __init__(
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@ -369,9 +369,14 @@ class EngineArgs:
            '--distributed-executor-backend',
            choices=['ray', 'mp'],
            default=EngineArgs.distributed_executor_backend,
-            help='Backend to use for distributed serving. When more than 1 GPU '
+            help='Backend to use for distributed model '
-            'is used, will be automatically set to "ray" if installed '
+            'workers, either "ray" or "mp" (multiprocessing). If the product '
-            'or "mp" (multiprocessing) otherwise.')
+            'of pipeline_parallel_size and tensor_parallel_size is less than '
            'or equal to the number of GPUs available, "mp" will be used to '
            'keep processing on a single host. Otherwise, this will default '
            'to "ray" if Ray is installed and fail otherwise. Note that tpu '
            'and hpu only support Ray for distributed inference.')
        parser.add_argument(
            '--worker-use-ray',
            action='store_true',