mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 18:25:45 +08:00
[TPU][Test] Add script to run benchmark on TPU for buildkite (#19039)
Signed-off-by: Qiliang Cui <derrhein@gmail.com>
This commit is contained in:
parent
84166fee97
commit
66c508b137
24
.buildkite/scripts/tpu/cleanup_docker.sh
Executable file
24
.buildkite/scripts/tpu/cleanup_docker.sh
Executable file
@ -0,0 +1,24 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
docker_root=$(docker info -f '{{.DockerRootDir}}')
|
||||||
|
if [ -z "$docker_root" ]; then
|
||||||
|
echo "Failed to determine Docker root directory."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "Docker root directory: $docker_root"
|
||||||
|
# Check disk usage of the filesystem where Docker's root directory is located
|
||||||
|
disk_usage=$(df "$docker_root" | tail -1 | awk '{print $5}' | sed 's/%//')
|
||||||
|
# Define the threshold
|
||||||
|
threshold=70
|
||||||
|
if [ "$disk_usage" -gt "$threshold" ]; then
|
||||||
|
echo "Disk usage is above $threshold%. Cleaning up Docker images and volumes..."
|
||||||
|
# Remove dangling images (those that are not tagged and not used by any container)
|
||||||
|
docker image prune -f
|
||||||
|
# Remove unused volumes / force the system prune for old images as well.
|
||||||
|
docker volume prune -f && docker system prune --force --filter "until=72h" --all
|
||||||
|
echo "Docker images and volumes cleanup completed."
|
||||||
|
else
|
||||||
|
echo "Disk usage is below $threshold%. No cleanup needed."
|
||||||
|
fi
|
||||||
14
.buildkite/scripts/tpu/config_v6e_1.env
Normal file
14
.buildkite/scripts/tpu/config_v6e_1.env
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
# Environment config
|
||||||
|
TEST_NAME=llama8b
|
||||||
|
CONTAINER_NAME=vllm-tpu
|
||||||
|
|
||||||
|
# vllm config
|
||||||
|
MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||||
|
MAX_NUM_SEQS=512
|
||||||
|
MAX_NUM_BATCHED_TOKENS=512
|
||||||
|
TENSOR_PARALLEL_SIZE=1
|
||||||
|
MAX_MODEL_LEN=2048
|
||||||
|
DOWNLOAD_DIR=/mnt/disks/persist
|
||||||
|
EXPECTED_THROUGHPUT=8.0
|
||||||
|
INPUT_LEN=1800
|
||||||
|
OUTPUT_LEN=128
|
||||||
102
.buildkite/scripts/tpu/docker_run_bm.sh
Executable file
102
.buildkite/scripts/tpu/docker_run_bm.sh
Executable file
@ -0,0 +1,102 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
if [ ! -f "$1" ]; then
|
||||||
|
echo "Error: The env file '$1' does not exist."
|
||||||
|
exit 1 # Exit the script with a non-zero status to indicate an error
|
||||||
|
fi
|
||||||
|
|
||||||
|
ENV_FILE=$1
|
||||||
|
|
||||||
|
# For testing on local vm, use `set -a` to export all variables
|
||||||
|
source /etc/environment
|
||||||
|
source $ENV_FILE
|
||||||
|
|
||||||
|
remove_docker_container() {
|
||||||
|
docker rm -f tpu-test || true;
|
||||||
|
docker rm -f vllm-tpu || true;
|
||||||
|
docker rm -f $CONTAINER_NAME || true;
|
||||||
|
}
|
||||||
|
|
||||||
|
trap remove_docker_container EXIT
|
||||||
|
|
||||||
|
# Remove the container that might not be cleaned up in the previous run.
|
||||||
|
remove_docker_container
|
||||||
|
|
||||||
|
# Build docker image.
|
||||||
|
# TODO: build the image outside the script and share the image with other
|
||||||
|
# tpu test if building time is too long.
|
||||||
|
DOCKER_BUILDKIT=1 docker build \
|
||||||
|
--build-arg max_jobs=16 \
|
||||||
|
--build-arg USE_SCCACHE=1 \
|
||||||
|
--build-arg GIT_REPO_CHECK=0 \
|
||||||
|
--tag vllm/vllm-tpu-bm \
|
||||||
|
--progress plain -f docker/Dockerfile.tpu .
|
||||||
|
|
||||||
|
LOG_ROOT=$(mktemp -d)
|
||||||
|
# If mktemp fails, set -e will cause the script to exit.
|
||||||
|
echo "Results will be stored in: $LOG_ROOT"
|
||||||
|
|
||||||
|
if [ -z "$HF_TOKEN" ]; then
|
||||||
|
echo "Error: HF_TOKEN is not set or is empty."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Make sure mounted disk or dir exists
|
||||||
|
if [ ! -d "$DOWNLOAD_DIR" ]; then
|
||||||
|
echo "Error: Folder $DOWNLOAD_DIR does not exist. This is useually a mounted drive. If no mounted drive, just create a folder."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Run model $MODEL"
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo "starting docker...$CONTAINER_NAME"
|
||||||
|
echo
|
||||||
|
docker run \
|
||||||
|
-v $DOWNLOAD_DIR:$DOWNLOAD_DIR \
|
||||||
|
--env-file $ENV_FILE \
|
||||||
|
-e HF_TOKEN="$HF_TOKEN" \
|
||||||
|
-e TARGET_COMMIT=$BUILDKITE_COMMIT \
|
||||||
|
-e MODEL=$MODEL \
|
||||||
|
-e WORKSPACE=/workspace \
|
||||||
|
--name $CONTAINER_NAME \
|
||||||
|
-d \
|
||||||
|
--privileged \
|
||||||
|
--network host \
|
||||||
|
-v /dev/shm:/dev/shm \
|
||||||
|
vllm/vllm-tpu-bm tail -f /dev/null
|
||||||
|
|
||||||
|
echo "run script..."
|
||||||
|
echo
|
||||||
|
docker exec "$CONTAINER_NAME" /bin/bash -c ".buildkite/scripts/hardware_ci/run_bm.sh"
|
||||||
|
|
||||||
|
echo "copy result back..."
|
||||||
|
VLLM_LOG="$LOG_ROOT/$TEST_NAME"_vllm_log.txt
|
||||||
|
BM_LOG="$LOG_ROOT/$TEST_NAME"_bm_log.txt
|
||||||
|
docker cp "$CONTAINER_NAME:/workspace/vllm_log.txt" "$VLLM_LOG"
|
||||||
|
docker cp "$CONTAINER_NAME:/workspace/bm_log.txt" "$BM_LOG"
|
||||||
|
|
||||||
|
throughput=$(grep "Request throughput (req/s):" "$BM_LOG" | sed 's/[^0-9.]//g')
|
||||||
|
echo "throughput for $TEST_NAME at $BUILDKITE_COMMIT: $throughput"
|
||||||
|
|
||||||
|
if [ "$BUILDKITE" = "true" ]; then
|
||||||
|
echo "Running inside Buildkite"
|
||||||
|
buildkite-agent artifact upload "$VLLM_LOG"
|
||||||
|
buildkite-agent artifact upload "$BM_LOG"
|
||||||
|
else
|
||||||
|
echo "Not running inside Buildkite"
|
||||||
|
fi
|
||||||
|
|
||||||
|
#
|
||||||
|
# compare the throughput with EXPECTED_THROUGHPUT
|
||||||
|
# and assert meeting the expectation
|
||||||
|
#
|
||||||
|
if [[ -z "$throughput" || ! "$throughput" =~ ^[0-9]+([.][0-9]+)?$ ]]; then
|
||||||
|
echo "Failed to get the throughput"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if (( $(echo "$throughput < $EXPECTED_THROUGHPUT" | bc -l) )); then
|
||||||
|
echo "Error: throughput($throughput) is less than expected($EXPECTED_THROUGHPUT)"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
94
.buildkite/scripts/tpu/run_bm.sh
Executable file
94
.buildkite/scripts/tpu/run_bm.sh
Executable file
@ -0,0 +1,94 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
VLLM_LOG="$WORKSPACE/vllm_log.txt"
|
||||||
|
BM_LOG="$WORKSPACE/bm_log.txt"
|
||||||
|
|
||||||
|
if [ -n "$TARGET_COMMIT" ]; then
|
||||||
|
head_hash=$(git rev-parse HEAD)
|
||||||
|
if [ "$TARGET_COMMIT" != "$head_hash" ]; then
|
||||||
|
echo "Error: target commit $TARGET_COMMIT does not match HEAD: $head_hash"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "model: $MODEL"
|
||||||
|
echo
|
||||||
|
|
||||||
|
#
|
||||||
|
# create a log folder
|
||||||
|
#
|
||||||
|
mkdir "$WORKSPACE/log"
|
||||||
|
|
||||||
|
# TODO: Move to image building.
|
||||||
|
pip install pandas
|
||||||
|
pip install datasets
|
||||||
|
|
||||||
|
#
|
||||||
|
# create sonnet_4x
|
||||||
|
#
|
||||||
|
echo "Create sonnet_4x.txt"
|
||||||
|
echo "" > benchmarks/sonnet_4x.txt
|
||||||
|
for _ in {1..4}
|
||||||
|
do
|
||||||
|
cat benchmarks/sonnet.txt >> benchmarks/sonnet_4x.txt
|
||||||
|
done
|
||||||
|
|
||||||
|
#
|
||||||
|
# start vllm service in backend
|
||||||
|
#
|
||||||
|
echo "lanching vllm..."
|
||||||
|
echo "logging to $VLLM_LOG"
|
||||||
|
echo
|
||||||
|
|
||||||
|
VLLM_USE_V1=1 vllm serve $MODEL \
|
||||||
|
--seed 42 \
|
||||||
|
--disable-log-requests \
|
||||||
|
--max-num-seqs $MAX_NUM_SEQS \
|
||||||
|
--max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
|
||||||
|
--tensor-parallel-size $TENSOR_PARALLEL_SIZE \
|
||||||
|
--no-enable-prefix-caching \
|
||||||
|
--download_dir $DOWNLOAD_DIR \
|
||||||
|
--max-model-len $MAX_MODEL_LEN > "$VLLM_LOG" 2>&1 &
|
||||||
|
|
||||||
|
|
||||||
|
echo "wait for 20 minutes.."
|
||||||
|
echo
|
||||||
|
# sleep 1200
|
||||||
|
# wait for 10 minutes...
|
||||||
|
for i in {1..120}; do
|
||||||
|
# TODO: detect other type of errors.
|
||||||
|
if grep -Fq "raise RuntimeError" "$VLLM_LOG"; then
|
||||||
|
echo "Detected RuntimeError, exiting."
|
||||||
|
exit 1
|
||||||
|
elif grep -Fq "Application startup complete" "$VLLM_LOG"; then
|
||||||
|
echo "Application started"
|
||||||
|
break
|
||||||
|
else
|
||||||
|
echo "wait for 10 seconds..."
|
||||||
|
sleep 10
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
#
|
||||||
|
# run test
|
||||||
|
#
|
||||||
|
echo "run benchmark test..."
|
||||||
|
echo "logging to $BM_LOG"
|
||||||
|
echo
|
||||||
|
python benchmarks/benchmark_serving.py \
|
||||||
|
--backend vllm \
|
||||||
|
--model $MODEL \
|
||||||
|
--dataset-name sonnet \
|
||||||
|
--dataset-path benchmarks/sonnet_4x.txt \
|
||||||
|
--sonnet-input-len $INPUT_LEN \
|
||||||
|
--sonnet-output-len $OUTPUT_LEN \
|
||||||
|
--ignore-eos > "$BM_LOG"
|
||||||
|
|
||||||
|
echo "completed..."
|
||||||
|
echo
|
||||||
|
|
||||||
|
throughput=$(grep "Request throughput (req/s):" "$BM_LOG" | sed 's/[^0-9.]//g')
|
||||||
|
echo "throughput: $throughput"
|
||||||
|
echo
|
||||||
Loading…
x
Reference in New Issue
Block a user