diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 119505f08571d..9a02bf83848cb 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1102,6 +1102,7 @@ class EngineArgs: placement_group = ray.util.get_current_placement_group() data_parallel_external_lb = self.data_parallel_rank is not None + # Local DP rank = 1, use pure-external LB. if data_parallel_external_lb: assert self.data_parallel_size_local in (1, None), ( "data_parallel_size_local must be 1 when data_parallel_rank " @@ -1109,6 +1110,7 @@ class EngineArgs: data_parallel_size_local = 1 # Use full external lb if we have local_size of 1. self.data_parallel_hybrid_lb = False + # Local DP rank > 1, use hybrid LB. elif self.data_parallel_hybrid_lb: assert self.data_parallel_start_rank is not None, ( "data_parallel_start_rank must be set to use " diff --git a/vllm/entrypoints/cli/serve.py b/vllm/entrypoints/cli/serve.py index e26642b2f65e4..01f1c1e37155b 100644 --- a/vllm/entrypoints/cli/serve.py +++ b/vllm/entrypoints/cli/serve.py @@ -86,8 +86,8 @@ def run_headless(args: argparse.Namespace): if not envs.VLLM_USE_V1: raise ValueError("Headless mode is only supported for V1") - if engine_args.data_parallel_rank is not None: - raise ValueError("data_parallel_rank is not applicable in " + if engine_args.data_parallel_hybrid_lb: + raise ValueError("data_parallel_hybrid_lb is not applicable in " "headless mode") parallel_config = vllm_config.parallel_config