mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-16 14:17:16 +08:00
[Docs] Improve documentation for ray cluster launcher helper script (#20602)
Signed-off-by: Ricardo Decal <rdecal@anyscale.com>
This commit is contained in:
parent
f148c44c6a
commit
33d560001e
@ -1,35 +1,81 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Launch a Ray cluster inside Docker for vLLM inference.
|
||||
#
|
||||
# This script can start either a head node or a worker node, depending on the
|
||||
# --head or --worker flag provided as the third positional argument.
|
||||
#
|
||||
# Usage:
|
||||
# 1. Designate one machine as the head node and execute:
|
||||
# bash run_cluster.sh \
|
||||
# vllm/vllm-openai \
|
||||
# <head_node_ip> \
|
||||
# --head \
|
||||
# /abs/path/to/huggingface/cache \
|
||||
# -e VLLM_HOST_IP=<head_node_ip>
|
||||
#
|
||||
# 2. On every worker machine, execute:
|
||||
# bash run_cluster.sh \
|
||||
# vllm/vllm-openai \
|
||||
# <head_node_ip> \
|
||||
# --worker \
|
||||
# /abs/path/to/huggingface/cache \
|
||||
# -e VLLM_HOST_IP=<worker_node_ip>
|
||||
#
|
||||
# Each worker requires a unique VLLM_HOST_IP value.
|
||||
# Keep each terminal session open. Closing a session stops the associated Ray
|
||||
# node and thereby shuts down the entire cluster.
|
||||
# Every machine must be reachable at the supplied IP address.
|
||||
#
|
||||
# The container is named "node-<random_suffix>". To open a shell inside
|
||||
# a container after launch, use:
|
||||
# docker exec -it node-<random_suffix> /bin/bash
|
||||
#
|
||||
# Then, you can execute vLLM commands on the Ray cluster as if it were a
|
||||
# single machine, e.g. vllm serve ...
|
||||
#
|
||||
# To stop the container, use:
|
||||
# docker stop node-<random_suffix>
|
||||
|
||||
# Check for minimum number of required arguments
|
||||
# Check for minimum number of required arguments.
|
||||
if [ $# -lt 4 ]; then
|
||||
echo "Usage: $0 docker_image head_node_address --head|--worker path_to_hf_home [additional_args...]"
|
||||
echo "Usage: $0 docker_image head_node_ip --head|--worker path_to_hf_home [additional_args...]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Assign the first three arguments and shift them away
|
||||
# Extract the mandatory positional arguments and remove them from $@.
|
||||
DOCKER_IMAGE="$1"
|
||||
HEAD_NODE_ADDRESS="$2"
|
||||
NODE_TYPE="$3" # Should be --head or --worker
|
||||
NODE_TYPE="$3" # Should be --head or --worker.
|
||||
PATH_TO_HF_HOME="$4"
|
||||
shift 4
|
||||
|
||||
# Additional arguments are passed directly to the Docker command
|
||||
# Preserve any extra arguments so they can be forwarded to Docker.
|
||||
ADDITIONAL_ARGS=("$@")
|
||||
|
||||
# Validate node type
|
||||
# Validate the NODE_TYPE argument.
|
||||
if [ "${NODE_TYPE}" != "--head" ] && [ "${NODE_TYPE}" != "--worker" ]; then
|
||||
echo "Error: Node type must be --head or --worker"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Define a function to cleanup on EXIT signal
|
||||
# Generate a unique container name with random suffix.
|
||||
# Docker container names must be unique on each host.
|
||||
# The random suffix allows multiple Ray containers to run simultaneously on the same machine,
|
||||
# for example, on a multi-GPU machine.
|
||||
CONTAINER_NAME="node-${RANDOM}"
|
||||
|
||||
# Define a cleanup routine that removes the container when the script exits.
|
||||
# This prevents orphaned containers from accumulating if the script is interrupted.
|
||||
cleanup() {
|
||||
docker stop node
|
||||
docker rm node
|
||||
docker stop "${CONTAINER_NAME}"
|
||||
docker rm "${CONTAINER_NAME}"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# Command setup for head or worker node
|
||||
# Build the Ray start command based on the node role.
|
||||
# The head node manages the cluster and accepts connections on port 6379,
|
||||
# while workers connect to the head's address.
|
||||
RAY_START_CMD="ray start --block"
|
||||
if [ "${NODE_TYPE}" == "--head" ]; then
|
||||
RAY_START_CMD+=" --head --port=6379"
|
||||
@ -37,11 +83,15 @@ else
|
||||
RAY_START_CMD+=" --address=${HEAD_NODE_ADDRESS}:6379"
|
||||
fi
|
||||
|
||||
# Run the docker command with the user specified parameters and additional arguments
|
||||
# Launch the container with the assembled parameters.
|
||||
# --network host: Allows Ray nodes to communicate directly via host networking
|
||||
# --shm-size 10.24g: Increases shared memory
|
||||
# --gpus all: Gives container access to all GPUs on the host
|
||||
# -v HF_HOME: Mounts HuggingFace cache to avoid re-downloading models
|
||||
docker run \
|
||||
--entrypoint /bin/bash \
|
||||
--network host \
|
||||
--name node \
|
||||
--name "${CONTAINER_NAME}" \
|
||||
--shm-size 10.24g \
|
||||
--gpus all \
|
||||
-v "${PATH_TO_HF_HOME}:/root/.cache/huggingface" \
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user