From 187bd5f7f33b183a413b37533ac2719388df58a2 Mon Sep 17 00:00:00 2001
From: yurekami <yurekami@users.noreply.github.com>
Date: Thu, 25 Dec 2025 01:20:43 +0900
Subject: [PATCH] fix(ray): correct misleading warning message for multi-node
 clusters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes #31005

The warning message incorrectly stated "Tensor parallel size" when it
was actually comparing the world_size (TP × PP) against locally visible
GPUs. This was confusing for multi-node Ray deployments where:

1. The world_size includes both tensor and pipeline parallelism
2. The "available GPUs" count only reflects the local node, not the
   full Ray cluster

Changes:
- Replaced "Tensor parallel size" with "World size"
- Clarified that the GPU count is for locally visible devices
- Added context about multi-node Ray clusters
- Included the actual TP and PP values in the message for clarity

Signed-off-by: yurekami <yurekami@users.noreply.github.com>

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 vllm/v1/executor/ray_utils.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/vllm/v1/executor/ray_utils.py b/vllm/v1/executor/ray_utils.py
index 21910d1160bd4..2936855c84db5 100644
--- a/vllm/v1/executor/ray_utils.py
+++ b/vllm/v1/executor/ray_utils.py
@@ -329,14 +329,17 @@ def initialize_ray_cluster(
         available_gpus = cuda_device_count_stateless()
         if parallel_config.world_size > available_gpus:
             logger.warning(
-                "Tensor parallel size (%d) exceeds available GPUs (%d). "
-                "This may result in Ray placement group allocation failures. "
-                "Consider reducing tensor_parallel_size to %d or less, "
-                "or ensure your Ray cluster has %d GPUs available.",
+                "World size (%d) exceeds locally visible GPUs (%d). "
+                "For single-node deployments, this may result in Ray "
+                "placement group allocation failures. For multi-node Ray "
+                "clusters, ensure your cluster has %d GPUs available across "
+                "all nodes. (world_size = tensor_parallel_size=%d × "
+                "pipeline_parallel_size=%d)",
                 parallel_config.world_size,
                 available_gpus,
-                available_gpus,
                 parallel_config.world_size,
+                parallel_config.tensor_parallel_size,
+                parallel_config.pipeline_parallel_size,
             )
 
     if ray.is_initialized():