From 33d560001e300e7db3b089c97ebf85801297d9ec Mon Sep 17 00:00:00 2001 From: Ricardo Decal Date: Tue, 15 Jul 2025 06:55:45 -0400 Subject: [PATCH] [Docs] Improve documentation for ray cluster launcher helper script (#20602) Signed-off-by: Ricardo Decal --- examples/online_serving/run_cluster.sh | 74 +++++++++++++++++++++----- 1 file changed, 62 insertions(+), 12 deletions(-) diff --git a/examples/online_serving/run_cluster.sh b/examples/online_serving/run_cluster.sh index 7b4b40b4b7e23..522b9566212bb 100644 --- a/examples/online_serving/run_cluster.sh +++ b/examples/online_serving/run_cluster.sh @@ -1,35 +1,81 @@ #!/bin/bash +# +# Launch a Ray cluster inside Docker for vLLM inference. +# +# This script can start either a head node or a worker node, depending on the +# --head or --worker flag provided as the third positional argument. +# +# Usage: +# 1. Designate one machine as the head node and execute: +# bash run_cluster.sh \ +# vllm/vllm-openai \ +# \ +# --head \ +# /abs/path/to/huggingface/cache \ +# -e VLLM_HOST_IP= +# +# 2. On every worker machine, execute: +# bash run_cluster.sh \ +# vllm/vllm-openai \ +# \ +# --worker \ +# /abs/path/to/huggingface/cache \ +# -e VLLM_HOST_IP= +# +# Each worker requires a unique VLLM_HOST_IP value. +# Keep each terminal session open. Closing a session stops the associated Ray +# node and thereby shuts down the entire cluster. +# Every machine must be reachable at the supplied IP address. +# +# The container is named "node-". To open a shell inside +# a container after launch, use: +# docker exec -it node- /bin/bash +# +# Then, you can execute vLLM commands on the Ray cluster as if it were a +# single machine, e.g. vllm serve ... +# +# To stop the container, use: +# docker stop node- -# Check for minimum number of required arguments +# Check for minimum number of required arguments. if [ $# -lt 4 ]; then - echo "Usage: $0 docker_image head_node_address --head|--worker path_to_hf_home [additional_args...]" + echo "Usage: $0 docker_image head_node_ip --head|--worker path_to_hf_home [additional_args...]" exit 1 fi -# Assign the first three arguments and shift them away +# Extract the mandatory positional arguments and remove them from $@. DOCKER_IMAGE="$1" HEAD_NODE_ADDRESS="$2" -NODE_TYPE="$3" # Should be --head or --worker +NODE_TYPE="$3" # Should be --head or --worker. PATH_TO_HF_HOME="$4" shift 4 -# Additional arguments are passed directly to the Docker command +# Preserve any extra arguments so they can be forwarded to Docker. ADDITIONAL_ARGS=("$@") -# Validate node type +# Validate the NODE_TYPE argument. if [ "${NODE_TYPE}" != "--head" ] && [ "${NODE_TYPE}" != "--worker" ]; then echo "Error: Node type must be --head or --worker" exit 1 fi -# Define a function to cleanup on EXIT signal +# Generate a unique container name with random suffix. +# Docker container names must be unique on each host. +# The random suffix allows multiple Ray containers to run simultaneously on the same machine, +# for example, on a multi-GPU machine. +CONTAINER_NAME="node-${RANDOM}" + +# Define a cleanup routine that removes the container when the script exits. +# This prevents orphaned containers from accumulating if the script is interrupted. cleanup() { - docker stop node - docker rm node + docker stop "${CONTAINER_NAME}" + docker rm "${CONTAINER_NAME}" } trap cleanup EXIT -# Command setup for head or worker node +# Build the Ray start command based on the node role. +# The head node manages the cluster and accepts connections on port 6379, +# while workers connect to the head's address. RAY_START_CMD="ray start --block" if [ "${NODE_TYPE}" == "--head" ]; then RAY_START_CMD+=" --head --port=6379" @@ -37,11 +83,15 @@ else RAY_START_CMD+=" --address=${HEAD_NODE_ADDRESS}:6379" fi -# Run the docker command with the user specified parameters and additional arguments +# Launch the container with the assembled parameters. +# --network host: Allows Ray nodes to communicate directly via host networking +# --shm-size 10.24g: Increases shared memory +# --gpus all: Gives container access to all GPUs on the host +# -v HF_HOME: Mounts HuggingFace cache to avoid re-downloading models docker run \ --entrypoint /bin/bash \ --network host \ - --name node \ + --name "${CONTAINER_NAME}" \ --shm-size 10.24g \ --gpus all \ -v "${PATH_TO_HF_HOME}:/root/.cache/huggingface" \