From ed540d6d4c12309f1b258b440e2803e00cc4eb0e Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 22 Oct 2025 02:18:01 -0700 Subject: [PATCH] Update release pipeline for PyTorch 2.9.0 (#27303) Signed-off-by: Huy Do --- .buildkite/release-pipeline.yaml | 38 ++++++++++++++--------------- .buildkite/scripts/upload-wheels.sh | 20 +++++---------- docker/Dockerfile.cpu | 2 +- 3 files changed, 26 insertions(+), 34 deletions(-) diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index 5bc59c151565f..afb83c249087c 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -1,5 +1,5 @@ steps: - # aarch64 + CUDA builds. PyTorch 2.8 aarch64 + CUDA wheel is only available on CUDA 12.9 + # aarch64 + CUDA builds - label: "Build arm64 wheel - CUDA 12.9" depends_on: ~ id: build-wheel-arm64-cuda-12-9 @@ -15,20 +15,21 @@ steps: env: DOCKER_BUILDKIT: "1" - # aarch64 build. + # aarch64 build - label: "Build arm64 CPU wheel" depends_on: ~ id: build-wheel-arm64-cpu agents: queue: arm64_cpu_queue_postmerge commands: - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile.cpu ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_BUILD_ACL=ON --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile.cpu ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/scripts/upload-wheels.sh" env: DOCKER_BUILDKIT: "1" + # x86 + CUDA builds - label: "Build wheel - CUDA 12.8" depends_on: ~ id: build-wheel-cuda-12-8 @@ -42,20 +43,6 @@ steps: env: DOCKER_BUILDKIT: "1" - - label: "Build wheel - CUDA 12.6" - depends_on: ~ - id: build-wheel-cuda-12-6 - agents: - queue: cpu_queue_postmerge - commands: - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.6.3 --build-arg torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0+PTX' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - - "mkdir artifacts" - - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - - "bash .buildkite/scripts/upload-wheels.sh" - env: - DOCKER_BUILDKIT: "1" - - # x86 + CUDA builds - label: "Build wheel - CUDA 12.9" depends_on: ~ id: build-wheel-cuda-12-9 @@ -69,6 +56,20 @@ steps: env: DOCKER_BUILDKIT: "1" + - label: "Build wheel - CUDA 13.0" + depends_on: ~ + id: build-wheel-cuda-13-0 + agents: + queue: cpu_queue_postmerge + commands: + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." + - "mkdir artifacts" + - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" + - "bash .buildkite/scripts/upload-wheels.sh" + env: + DOCKER_BUILDKIT: "1" + + # Build release images (12.9) - label: "Build release image (x86)" depends_on: ~ id: build-release-image-x86 @@ -76,13 +77,12 @@ steps: queue: cpu_queue_postmerge commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.8.1 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)" # re-tag to default image tag and push, just in case arm64 build fails - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT" - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT" - # PyTorch 2.8 aarch64 + CUDA wheel is only available on CUDA 12.9 - label: "Build release image (arm64)" depends_on: ~ id: build-release-image-arm64 diff --git a/.buildkite/scripts/upload-wheels.sh b/.buildkite/scripts/upload-wheels.sh index 43aa8c47be299..945c5e48c0090 100644 --- a/.buildkite/scripts/upload-wheels.sh +++ b/.buildkite/scripts/upload-wheels.sh @@ -58,33 +58,25 @@ python3 .buildkite/generate_index.py --wheel "$normal_wheel" aws s3 cp "$wheel" "s3://vllm-wheels/$BUILDKITE_COMMIT/" aws s3 cp "$normal_wheel" "s3://vllm-wheels/$BUILDKITE_COMMIT/" -if [[ $normal_wheel == *"cu126"* ]]; then - # if $normal_wheel matches cu126, do not upload the index.html - echo "Skipping index files for cu126 wheels" -elif [[ $normal_wheel == *"cu128"* ]]; then - # if $normal_wheel matches cu128, do not upload the index.html - echo "Skipping index files for cu128 wheels" -else +if [[ $normal_wheel == *"cu129"* ]]; then # only upload index.html for cu129 wheels (default wheels) as it # is available on both x86 and arm64 aws s3 cp index.html "s3://vllm-wheels/$BUILDKITE_COMMIT/vllm/index.html" aws s3 cp "s3://vllm-wheels/nightly/index.html" "s3://vllm-wheels/$BUILDKITE_COMMIT/index.html" +else + echo "Skipping index files for non-cu129 wheels" fi # generate index for nightly aws s3 cp "$wheel" "s3://vllm-wheels/nightly/" aws s3 cp "$normal_wheel" "s3://vllm-wheels/nightly/" -if [[ $normal_wheel == *"cu126"* ]]; then - # if $normal_wheel matches cu126, do not upload the index.html - echo "Skipping index files for cu126 wheels" -elif [[ $normal_wheel == *"cu128"* ]]; then - # if $normal_wheel matches cu128, do not upload the index.html - echo "Skipping index files for cu128 wheels" -else +if [[ $normal_wheel == *"cu129"* ]]; then # only upload index.html for cu129 wheels (default wheels) as it # is available on both x86 and arm64 aws s3 cp index.html "s3://vllm-wheels/nightly/vllm/index.html" +else + echo "Skipping index files for non-cu129 wheels" fi aws s3 cp "$wheel" "s3://vllm-wheels/$version/" diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index 9c5de3e440867..5798e589edafb 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -31,7 +31,7 @@ ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ --mount=type=cache,target=/var/lib/apt,sharing=locked \ apt-get update -y \ - && apt-get install -y --no-install-recommends ccache git curl wget ca-certificates \ + && apt-get install -y --no-install-recommends sudo ccache git curl wget ca-certificates \ gcc-12 g++-12 libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 jq lsof \ && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 \ && curl -LsSf https://astral.sh/uv/install.sh | sh