[Doc] Update help text for --distributed-executor-backend (#10231)

Signed-off-by: Russell Bryant <rbryant@redhat.com>
This commit is contained in:
Russell Bryant 2024-11-11 20:55:09 -05:00 committed by GitHub
parent d1c6799b88
commit 9cdba9669c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 14 additions and 6 deletions

View File

@ -951,9 +951,12 @@ class ParallelConfig:
https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler. https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler.
placement_group: ray distributed model workers placement group. placement_group: ray distributed model workers placement group.
distributed_executor_backend: Backend to use for distributed model distributed_executor_backend: Backend to use for distributed model
workers, either "ray" or "mp" (multiprocessing). If either workers, either "ray" or "mp" (multiprocessing). If the product
pipeline_parallel_size or tensor_parallel_size is greater than 1, of pipeline_parallel_size and tensor_parallel_size is less than
will default to "ray" if Ray is installed or "mp" otherwise. or equal to the number of GPUs available, "mp" will be used to
keep processing on a single host. Otherwise, this will default
to "ray" if Ray is installed and fail otherwise. Note that tpu
and hpu only support Ray for distributed inference.
""" """
def __init__( def __init__(

View File

@ -369,9 +369,14 @@ class EngineArgs:
'--distributed-executor-backend', '--distributed-executor-backend',
choices=['ray', 'mp'], choices=['ray', 'mp'],
default=EngineArgs.distributed_executor_backend, default=EngineArgs.distributed_executor_backend,
help='Backend to use for distributed serving. When more than 1 GPU ' help='Backend to use for distributed model '
'is used, will be automatically set to "ray" if installed ' 'workers, either "ray" or "mp" (multiprocessing). If the product '
'or "mp" (multiprocessing) otherwise.') 'of pipeline_parallel_size and tensor_parallel_size is less than '
'or equal to the number of GPUs available, "mp" will be used to '
'keep processing on a single host. Otherwise, this will default '
'to "ray" if Ray is installed and fail otherwise. Note that tpu '
'and hpu only support Ray for distributed inference.')
parser.add_argument( parser.add_argument(
'--worker-use-ray', '--worker-use-ray',
action='store_true', action='store_true',