From 33d560001e300e7db3b089c97ebf85801297d9ec Mon Sep 17 00:00:00 2001
From: Ricardo Decal <crypdick@users.noreply.github.com>
Date: Tue, 15 Jul 2025 06:55:45 -0400
Subject: [PATCH] [Docs] Improve documentation for ray cluster launcher helper
 script (#20602)

Signed-off-by: Ricardo Decal <rdecal@anyscale.com>
---
 examples/online_serving/run_cluster.sh | 74 +++++++++++++++++++++-----
 1 file changed, 62 insertions(+), 12 deletions(-)

diff --git a/examples/online_serving/run_cluster.sh b/examples/online_serving/run_cluster.sh
index 7b4b40b4b7e23..522b9566212bb 100644
--- a/examples/online_serving/run_cluster.sh
+++ b/examples/online_serving/run_cluster.sh
@@ -1,35 +1,81 @@
 #!/bin/bash
+#
+# Launch a Ray cluster inside Docker for vLLM inference.
+#
+# This script can start either a head node or a worker node, depending on the
+# --head or --worker flag provided as the third positional argument.
+#
+# Usage:
+# 1. Designate one machine as the head node and execute:
+#    bash run_cluster.sh \
+#         vllm/vllm-openai \
+#         <head_node_ip> \
+#         --head \
+#         /abs/path/to/huggingface/cache \
+#         -e VLLM_HOST_IP=<head_node_ip>
+#
+# 2. On every worker machine, execute:
+#    bash run_cluster.sh \
+#         vllm/vllm-openai \
+#         <head_node_ip> \
+#         --worker \
+#         /abs/path/to/huggingface/cache \
+#         -e VLLM_HOST_IP=<worker_node_ip>
+# 
+# Each worker requires a unique VLLM_HOST_IP value.
+# Keep each terminal session open. Closing a session stops the associated Ray
+# node and thereby shuts down the entire cluster.
+# Every machine must be reachable at the supplied IP address.
+#
+# The container is named "node-<random_suffix>". To open a shell inside
+# a container after launch, use:
+#       docker exec -it node-<random_suffix> /bin/bash
+#
+# Then, you can execute vLLM commands on the Ray cluster as if it were a
+# single machine, e.g. vllm serve ...
+#
+# To stop the container, use:
+#       docker stop node-<random_suffix>
 
-# Check for minimum number of required arguments
+# Check for minimum number of required arguments.
 if [ $# -lt 4 ]; then
-    echo "Usage: $0 docker_image head_node_address --head|--worker path_to_hf_home [additional_args...]"
+    echo "Usage: $0 docker_image head_node_ip --head|--worker path_to_hf_home [additional_args...]"
     exit 1
 fi
 
-# Assign the first three arguments and shift them away
+# Extract the mandatory positional arguments and remove them from $@.
 DOCKER_IMAGE="$1"
 HEAD_NODE_ADDRESS="$2"
-NODE_TYPE="$3"  # Should be --head or --worker
+NODE_TYPE="$3"  # Should be --head or --worker.
 PATH_TO_HF_HOME="$4"
 shift 4
 
-# Additional arguments are passed directly to the Docker command
+# Preserve any extra arguments so they can be forwarded to Docker.
 ADDITIONAL_ARGS=("$@")
 
-# Validate node type
+# Validate the NODE_TYPE argument.
 if [ "${NODE_TYPE}" != "--head" ] && [ "${NODE_TYPE}" != "--worker" ]; then
     echo "Error: Node type must be --head or --worker"
     exit 1
 fi
 
-# Define a function to cleanup on EXIT signal
+# Generate a unique container name with random suffix.
+# Docker container names must be unique on each host.
+# The random suffix allows multiple Ray containers to run simultaneously on the same machine,
+# for example, on a multi-GPU machine.
+CONTAINER_NAME="node-${RANDOM}"
+
+# Define a cleanup routine that removes the container when the script exits.
+# This prevents orphaned containers from accumulating if the script is interrupted.
 cleanup() {
-    docker stop node
-    docker rm node
+    docker stop "${CONTAINER_NAME}"
+    docker rm "${CONTAINER_NAME}"
 }
 trap cleanup EXIT
 
-# Command setup for head or worker node
+# Build the Ray start command based on the node role.
+# The head node manages the cluster and accepts connections on port 6379, 
+# while workers connect to the head's address.
 RAY_START_CMD="ray start --block"
 if [ "${NODE_TYPE}" == "--head" ]; then
     RAY_START_CMD+=" --head --port=6379"
@@ -37,11 +83,15 @@ else
     RAY_START_CMD+=" --address=${HEAD_NODE_ADDRESS}:6379"
 fi
 
-# Run the docker command with the user specified parameters and additional arguments
+# Launch the container with the assembled parameters.
+# --network host: Allows Ray nodes to communicate directly via host networking
+# --shm-size 10.24g: Increases shared memory
+# --gpus all: Gives container access to all GPUs on the host
+# -v HF_HOME: Mounts HuggingFace cache to avoid re-downloading models
 docker run \
     --entrypoint /bin/bash \
     --network host \
-    --name node \
+    --name "${CONTAINER_NAME}" \
     --shm-size 10.24g \
     --gpus all \
     -v "${PATH_TO_HF_HOME}:/root/.cache/huggingface" \