From 0b9c701495e220038a0ef4e26037fa5edc7a6993 Mon Sep 17 00:00:00 2001 From: atalman Date: Tue, 23 Dec 2025 13:53:30 -0800 Subject: [PATCH] Adopt Dockerfile to build nightly version --- .buildkite/release-pipeline.yaml | 66 +++++++++++++++++++++++++ docker/Dockerfile | 82 ++++++++++++++++++++++++++++---- 2 files changed, 140 insertions(+), 8 deletions(-) diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index a9d51557bd9bb..2477e60da4c16 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -196,3 +196,69 @@ steps: env: DOCKER_BUILDKIT: "1" DOCKERHUB_USERNAME: "vllmbot" + + # Build nightly torch Docker images (x86) + - label: "Build nightly torch image (x86)" + depends_on: ~ + id: build-nightly-torch-image-x86 + if: build.env("NIGHTLY") == "1" + agents: + queue: cpu_queue_postmerge + commands: + - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg USE_TORCH_NIGHTLY=true --build-arg FLASHINFER_AOT_COMPILE=true --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 --target vllm-openai --progress plain -f docker/Dockerfile ." + - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64" + + # Build nightly torch Docker images (arm64) + - label: "Build nightly torch image (arm64)" + depends_on: ~ + id: build-nightly-torch-image-arm64 + if: build.env("NIGHTLY") == "1" + agents: + queue: arm64_cpu_queue_postmerge + commands: + - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg USE_TORCH_NIGHTLY=true --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 --target vllm-openai --progress plain -f docker/Dockerfile ." + - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64" + + # Create multi-arch manifest for nightly torch images + - label: "Create nightly torch multi-arch manifest" + depends_on: + - build-nightly-torch-image-x86 + - build-nightly-torch-image-arm64 + id: create-nightly-torch-multi-arch-manifest + if: build.env("NIGHTLY") == "1" + agents: + queue: cpu_queue_postmerge + commands: + - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" + - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 --amend" + - "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly" + + # Publish nightly torch images to DockerHub + - label: "Publish nightly torch images to DockerHub" + depends_on: + - create-nightly-torch-multi-arch-manifest + if: build.env("NIGHTLY") == "1" + agents: + queue: cpu_queue_postmerge + commands: + - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" + - "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64" + - "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64" + - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-x86_64" + - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 vllm/vllm-openai:torch-nightly-aarch64" + - "docker push vllm/vllm-openai:torch-nightly-x86_64" + - "docker push vllm/vllm-openai:torch-nightly-aarch64" + - "docker manifest create vllm/vllm-openai:torch-nightly vllm/vllm-openai:torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-aarch64 --amend" + - "docker manifest create vllm/vllm-openai:torch-nightly-$BUILDKITE_COMMIT vllm/vllm-openai:torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-aarch64 --amend" + - "docker manifest push vllm/vllm-openai:torch-nightly" + - "docker manifest push vllm/vllm-openai:torch-nightly-$BUILDKITE_COMMIT" + plugins: + - docker-login#v3.0.0: + username: vllmbot + password-env: DOCKERHUB_TOKEN + env: + DOCKER_BUILDKIT: "1" + DOCKERHUB_USERNAME: "vllmbot" + diff --git a/docker/Dockerfile b/docker/Dockerfile index e61021b6eeb85..fd965b7f7ae0e 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -57,6 +57,9 @@ ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} # PyTorch provides its own indexes for standard and nightly builds ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl +# Flag to install PyTorch nightly instead of stable +ARG USE_TORCH_NIGHTLY=false + # PIP supports multiple authentication schemes, including keyring # By parameterizing the PIP_KEYRING_PROVIDER variable and setting it to # disabled by default, we allow third-party to use keyring authentication for @@ -128,6 +131,7 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ # Install PyTorch and core CUDA dependencies # This is ~2GB and rarely changes ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG USE_TORCH_NIGHTLY WORKDIR /workspace @@ -135,8 +139,15 @@ WORKDIR /workspace COPY requirements/common.txt requirements/common.txt COPY requirements/cuda.txt requirements/cuda.txt RUN --mount=type=cache,target=/root/.cache/uv \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG="--prerelease=allow"; \ + else \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG=""; \ + fi && \ uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} # CUDA arch list used by torch # Explicitly set the list to avoid issues with torch 2.2 @@ -152,6 +163,7 @@ ARG TARGETPLATFORM ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG USE_TORCH_NIGHTLY # install build dependencies COPY requirements/build.txt requirements/build.txt @@ -164,8 +176,15 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" ENV UV_LINK_MODE=copy RUN --mount=type=cache,target=/root/.cache/uv \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG="--prerelease=allow"; \ + else \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG=""; \ + fi && \ uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} WORKDIR /workspace @@ -285,6 +304,7 @@ ARG TARGETPLATFORM ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG USE_TORCH_NIGHTLY # install build dependencies COPY requirements/build.txt requirements/build.txt @@ -297,8 +317,15 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" ENV UV_LINK_MODE=copy RUN --mount=type=cache,target=/root/.cache/uv \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG="--prerelease=allow"; \ + else \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG=""; \ + fi && \ uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} WORKDIR /workspace @@ -347,6 +374,7 @@ FROM base AS dev ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG USE_TORCH_NIGHTLY # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 @@ -361,8 +389,15 @@ COPY requirements/lint.txt requirements/lint.txt COPY requirements/test.txt requirements/test.txt COPY requirements/dev.txt requirements/dev.txt RUN --mount=type=cache,target=/root/.cache/uv \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG="--prerelease=allow"; \ + else \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG=""; \ + fi && \ uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} #################### DEV IMAGE #################### #################### vLLM installation IMAGE #################### # image with vLLM installed @@ -456,11 +491,19 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ # Install PyTorch and core CUDA dependencies # This is ~2GB and rarely changes ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG USE_TORCH_NIGHTLY COPY requirements/common.txt /tmp/common.txt COPY requirements/cuda.txt /tmp/requirements-cuda.txt RUN --mount=type=cache,target=/root/.cache/uv \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG="--prerelease=allow"; \ + else \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG=""; \ + fi && \ uv pip install --system -r /tmp/requirements-cuda.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && \ + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} && \ rm /tmp/requirements-cuda.txt /tmp/common.txt # Install FlashInfer pre-compiled kernel cache and binaries @@ -512,12 +555,20 @@ ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER +ARG USE_TORCH_NIGHTLY # Install vllm wheel first, so that torch etc will be installed. RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ --mount=type=cache,target=/root/.cache/uv \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG="--prerelease=allow"; \ + else \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG=""; \ + fi && \ uv pip install --system dist/*.whl --verbose \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} RUN --mount=type=cache,target=/root/.cache/uv \ . /etc/environment && \ @@ -538,8 +589,15 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH # Install EP kernels wheels (pplx-kernels and DeepEP) that have been built in the `build` stage RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \ --mount=type=cache,target=/root/.cache/uv \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG="--prerelease=allow"; \ + else \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG=""; \ + fi && \ uv pip install --system ep_kernels/dist/*.whl --verbose \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} # CUDA image changed from /usr/local/nvidia to /usr/local/cuda in 12.8 but will # return to /usr/local/nvidia in 13.0 to allow container providers to mount drivers @@ -564,6 +622,7 @@ ARG PYTHON_VERSION ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG USE_TORCH_NIGHTLY # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 @@ -581,8 +640,15 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ RUN --mount=type=cache,target=/root/.cache/uv \ CUDA_MAJOR="${CUDA_VERSION%%.*}"; \ if [ "$CUDA_MAJOR" -ge 12 ]; then \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG="--prerelease=allow"; \ + else \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG=""; \ + fi && \ uv pip install --system -r requirements/dev.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}; \ fi # install development dependencies (for testing)