diff --git a/vllm/v1/executor/ray_utils.py b/vllm/v1/executor/ray_utils.py index 21910d1160bd4..2936855c84db5 100644 --- a/vllm/v1/executor/ray_utils.py +++ b/vllm/v1/executor/ray_utils.py @@ -329,14 +329,17 @@ def initialize_ray_cluster( available_gpus = cuda_device_count_stateless() if parallel_config.world_size > available_gpus: logger.warning( - "Tensor parallel size (%d) exceeds available GPUs (%d). " - "This may result in Ray placement group allocation failures. " - "Consider reducing tensor_parallel_size to %d or less, " - "or ensure your Ray cluster has %d GPUs available.", + "World size (%d) exceeds locally visible GPUs (%d). " + "For single-node deployments, this may result in Ray " + "placement group allocation failures. For multi-node Ray " + "clusters, ensure your cluster has %d GPUs available across " + "all nodes. (world_size = tensor_parallel_size=%d × " + "pipeline_parallel_size=%d)", parallel_config.world_size, available_gpus, - available_gpus, parallel_config.world_size, + parallel_config.tensor_parallel_size, + parallel_config.pipeline_parallel_size, ) if ray.is_initialized():