From 187bd5f7f33b183a413b37533ac2719388df58a2 Mon Sep 17 00:00:00 2001 From: yurekami Date: Thu, 25 Dec 2025 01:20:43 +0900 Subject: [PATCH] fix(ray): correct misleading warning message for multi-node clusters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #31005 The warning message incorrectly stated "Tensor parallel size" when it was actually comparing the world_size (TP × PP) against locally visible GPUs. This was confusing for multi-node Ray deployments where: 1. The world_size includes both tensor and pipeline parallelism 2. The "available GPUs" count only reflects the local node, not the full Ray cluster Changes: - Replaced "Tensor parallel size" with "World size" - Clarified that the GPU count is for locally visible devices - Added context about multi-node Ray clusters - Included the actual TP and PP values in the message for clarity Signed-off-by: yurekami 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- vllm/v1/executor/ray_utils.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/vllm/v1/executor/ray_utils.py b/vllm/v1/executor/ray_utils.py index 21910d1160bd4..2936855c84db5 100644 --- a/vllm/v1/executor/ray_utils.py +++ b/vllm/v1/executor/ray_utils.py @@ -329,14 +329,17 @@ def initialize_ray_cluster( available_gpus = cuda_device_count_stateless() if parallel_config.world_size > available_gpus: logger.warning( - "Tensor parallel size (%d) exceeds available GPUs (%d). " - "This may result in Ray placement group allocation failures. " - "Consider reducing tensor_parallel_size to %d or less, " - "or ensure your Ray cluster has %d GPUs available.", + "World size (%d) exceeds locally visible GPUs (%d). " + "For single-node deployments, this may result in Ray " + "placement group allocation failures. For multi-node Ray " + "clusters, ensure your cluster has %d GPUs available across " + "all nodes. (world_size = tensor_parallel_size=%d × " + "pipeline_parallel_size=%d)", parallel_config.world_size, available_gpus, - available_gpus, parallel_config.world_size, + parallel_config.tensor_parallel_size, + parallel_config.pipeline_parallel_size, ) if ray.is_initialized():