From 0b9c701495e220038a0ef4e26037fa5edc7a6993 Mon Sep 17 00:00:00 2001 From: atalman Date: Tue, 23 Dec 2025 13:53:30 -0800 Subject: [PATCH 1/4] Adopt Dockerfile to build nightly version --- .buildkite/release-pipeline.yaml | 66 +++++++++++++++++++++++++ docker/Dockerfile | 82 ++++++++++++++++++++++++++++---- 2 files changed, 140 insertions(+), 8 deletions(-) diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index a9d51557bd9bb..2477e60da4c16 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -196,3 +196,69 @@ steps: env: DOCKER_BUILDKIT: "1" DOCKERHUB_USERNAME: "vllmbot" + + # Build nightly torch Docker images (x86) + - label: "Build nightly torch image (x86)" + depends_on: ~ + id: build-nightly-torch-image-x86 + if: build.env("NIGHTLY") == "1" + agents: + queue: cpu_queue_postmerge + commands: + - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg USE_TORCH_NIGHTLY=true --build-arg FLASHINFER_AOT_COMPILE=true --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 --target vllm-openai --progress plain -f docker/Dockerfile ." + - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64" + + # Build nightly torch Docker images (arm64) + - label: "Build nightly torch image (arm64)" + depends_on: ~ + id: build-nightly-torch-image-arm64 + if: build.env("NIGHTLY") == "1" + agents: + queue: arm64_cpu_queue_postmerge + commands: + - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg USE_TORCH_NIGHTLY=true --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 --target vllm-openai --progress plain -f docker/Dockerfile ." + - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64" + + # Create multi-arch manifest for nightly torch images + - label: "Create nightly torch multi-arch manifest" + depends_on: + - build-nightly-torch-image-x86 + - build-nightly-torch-image-arm64 + id: create-nightly-torch-multi-arch-manifest + if: build.env("NIGHTLY") == "1" + agents: + queue: cpu_queue_postmerge + commands: + - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" + - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 --amend" + - "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly" + + # Publish nightly torch images to DockerHub + - label: "Publish nightly torch images to DockerHub" + depends_on: + - create-nightly-torch-multi-arch-manifest + if: build.env("NIGHTLY") == "1" + agents: + queue: cpu_queue_postmerge + commands: + - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" + - "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64" + - "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64" + - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-x86_64" + - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 vllm/vllm-openai:torch-nightly-aarch64" + - "docker push vllm/vllm-openai:torch-nightly-x86_64" + - "docker push vllm/vllm-openai:torch-nightly-aarch64" + - "docker manifest create vllm/vllm-openai:torch-nightly vllm/vllm-openai:torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-aarch64 --amend" + - "docker manifest create vllm/vllm-openai:torch-nightly-$BUILDKITE_COMMIT vllm/vllm-openai:torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-aarch64 --amend" + - "docker manifest push vllm/vllm-openai:torch-nightly" + - "docker manifest push vllm/vllm-openai:torch-nightly-$BUILDKITE_COMMIT" + plugins: + - docker-login#v3.0.0: + username: vllmbot + password-env: DOCKERHUB_TOKEN + env: + DOCKER_BUILDKIT: "1" + DOCKERHUB_USERNAME: "vllmbot" + diff --git a/docker/Dockerfile b/docker/Dockerfile index e61021b6eeb85..fd965b7f7ae0e 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -57,6 +57,9 @@ ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} # PyTorch provides its own indexes for standard and nightly builds ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl +# Flag to install PyTorch nightly instead of stable +ARG USE_TORCH_NIGHTLY=false + # PIP supports multiple authentication schemes, including keyring # By parameterizing the PIP_KEYRING_PROVIDER variable and setting it to # disabled by default, we allow third-party to use keyring authentication for @@ -128,6 +131,7 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ # Install PyTorch and core CUDA dependencies # This is ~2GB and rarely changes ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG USE_TORCH_NIGHTLY WORKDIR /workspace @@ -135,8 +139,15 @@ WORKDIR /workspace COPY requirements/common.txt requirements/common.txt COPY requirements/cuda.txt requirements/cuda.txt RUN --mount=type=cache,target=/root/.cache/uv \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG="--prerelease=allow"; \ + else \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG=""; \ + fi && \ uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} # CUDA arch list used by torch # Explicitly set the list to avoid issues with torch 2.2 @@ -152,6 +163,7 @@ ARG TARGETPLATFORM ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG USE_TORCH_NIGHTLY # install build dependencies COPY requirements/build.txt requirements/build.txt @@ -164,8 +176,15 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" ENV UV_LINK_MODE=copy RUN --mount=type=cache,target=/root/.cache/uv \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG="--prerelease=allow"; \ + else \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG=""; \ + fi && \ uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} WORKDIR /workspace @@ -285,6 +304,7 @@ ARG TARGETPLATFORM ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG USE_TORCH_NIGHTLY # install build dependencies COPY requirements/build.txt requirements/build.txt @@ -297,8 +317,15 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" ENV UV_LINK_MODE=copy RUN --mount=type=cache,target=/root/.cache/uv \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG="--prerelease=allow"; \ + else \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG=""; \ + fi && \ uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} WORKDIR /workspace @@ -347,6 +374,7 @@ FROM base AS dev ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG USE_TORCH_NIGHTLY # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 @@ -361,8 +389,15 @@ COPY requirements/lint.txt requirements/lint.txt COPY requirements/test.txt requirements/test.txt COPY requirements/dev.txt requirements/dev.txt RUN --mount=type=cache,target=/root/.cache/uv \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG="--prerelease=allow"; \ + else \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG=""; \ + fi && \ uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} #################### DEV IMAGE #################### #################### vLLM installation IMAGE #################### # image with vLLM installed @@ -456,11 +491,19 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ # Install PyTorch and core CUDA dependencies # This is ~2GB and rarely changes ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG USE_TORCH_NIGHTLY COPY requirements/common.txt /tmp/common.txt COPY requirements/cuda.txt /tmp/requirements-cuda.txt RUN --mount=type=cache,target=/root/.cache/uv \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG="--prerelease=allow"; \ + else \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG=""; \ + fi && \ uv pip install --system -r /tmp/requirements-cuda.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && \ + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} && \ rm /tmp/requirements-cuda.txt /tmp/common.txt # Install FlashInfer pre-compiled kernel cache and binaries @@ -512,12 +555,20 @@ ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER +ARG USE_TORCH_NIGHTLY # Install vllm wheel first, so that torch etc will be installed. RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ --mount=type=cache,target=/root/.cache/uv \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG="--prerelease=allow"; \ + else \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG=""; \ + fi && \ uv pip install --system dist/*.whl --verbose \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} RUN --mount=type=cache,target=/root/.cache/uv \ . /etc/environment && \ @@ -538,8 +589,15 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH # Install EP kernels wheels (pplx-kernels and DeepEP) that have been built in the `build` stage RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \ --mount=type=cache,target=/root/.cache/uv \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG="--prerelease=allow"; \ + else \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG=""; \ + fi && \ uv pip install --system ep_kernels/dist/*.whl --verbose \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} # CUDA image changed from /usr/local/nvidia to /usr/local/cuda in 12.8 but will # return to /usr/local/nvidia in 13.0 to allow container providers to mount drivers @@ -564,6 +622,7 @@ ARG PYTHON_VERSION ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG USE_TORCH_NIGHTLY # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 @@ -581,8 +640,15 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ RUN --mount=type=cache,target=/root/.cache/uv \ CUDA_MAJOR="${CUDA_VERSION%%.*}"; \ if [ "$CUDA_MAJOR" -ge 12 ]; then \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG="--prerelease=allow"; \ + else \ + PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ + PRERELEASE_FLAG=""; \ + fi && \ uv pip install --system -r requirements/dev.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}; \ fi # install development dependencies (for testing) From 69b9a839855019248ba0eee1e4cc49d18e14b6c8 Mon Sep 17 00:00:00 2001 From: atalman Date: Tue, 23 Dec 2025 13:59:34 -0800 Subject: [PATCH 2/4] refactor --- docker/Dockerfile | 132 ++++++++++++++++++++++++++-------------------- 1 file changed, 76 insertions(+), 56 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index fd965b7f7ae0e..a5c5e94092a3f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -133,19 +133,25 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ ARG PYTORCH_CUDA_INDEX_BASE_URL ARG USE_TORCH_NIGHTLY +# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY +RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ + echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \ + else \ + echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ + echo "PRERELEASE_FLAG=" >> /etc/environment; \ + fi +ENV PYTORCH_INDEX="" +ENV PRERELEASE_FLAG="" +RUN . /etc/environment && echo "PYTORCH_INDEX=${PYTORCH_INDEX}" && echo "PRERELEASE_FLAG=${PRERELEASE_FLAG}" + WORKDIR /workspace # install build and runtime dependencies COPY requirements/common.txt requirements/common.txt COPY requirements/cuda.txt requirements/cuda.txt RUN --mount=type=cache,target=/root/.cache/uv \ - if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ - PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ - PRERELEASE_FLAG="--prerelease=allow"; \ - else \ - PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ - PRERELEASE_FLAG=""; \ - fi && \ + . /etc/environment && \ uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \ --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} @@ -175,14 +181,19 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" # Use copy mode to avoid hardlink failures with Docker cache mounts ENV UV_LINK_MODE=copy -RUN --mount=type=cache,target=/root/.cache/uv \ - if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ - PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ - PRERELEASE_FLAG="--prerelease=allow"; \ +# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY +RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ + echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \ else \ - PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ - PRERELEASE_FLAG=""; \ - fi && \ + echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ + echo "PRERELEASE_FLAG=" >> /etc/environment; \ + fi +ENV PYTORCH_INDEX="" +ENV PRERELEASE_FLAG="" + +RUN --mount=type=cache,target=/root/.cache/uv \ + . /etc/environment && \ uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} @@ -316,14 +327,19 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" # Use copy mode to avoid hardlink failures with Docker cache mounts ENV UV_LINK_MODE=copy -RUN --mount=type=cache,target=/root/.cache/uv \ - if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ - PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ - PRERELEASE_FLAG="--prerelease=allow"; \ +# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY +RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ + echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \ else \ - PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ - PRERELEASE_FLAG=""; \ - fi && \ + echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ + echo "PRERELEASE_FLAG=" >> /etc/environment; \ + fi +ENV PYTORCH_INDEX="" +ENV PRERELEASE_FLAG="" + +RUN --mount=type=cache,target=/root/.cache/uv \ + . /etc/environment && \ uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} @@ -383,19 +399,24 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" # Use copy mode to avoid hardlink failures with Docker cache mounts ENV UV_LINK_MODE=copy +# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY +RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ + echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \ + else \ + echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ + echo "PRERELEASE_FLAG=" >> /etc/environment; \ + fi +ENV PYTORCH_INDEX="" +ENV PRERELEASE_FLAG="" + # Install libnuma-dev, required by fastsafetensors (fixes #20384) RUN apt-get update && apt-get install -y --no-install-recommends libnuma-dev && rm -rf /var/lib/apt/lists/* COPY requirements/lint.txt requirements/lint.txt COPY requirements/test.txt requirements/test.txt COPY requirements/dev.txt requirements/dev.txt RUN --mount=type=cache,target=/root/.cache/uv \ - if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ - PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ - PRERELEASE_FLAG="--prerelease=allow"; \ - else \ - PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ - PRERELEASE_FLAG=""; \ - fi && \ + . /etc/environment && \ uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \ --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} #################### DEV IMAGE #################### @@ -492,16 +513,22 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ # This is ~2GB and rarely changes ARG PYTORCH_CUDA_INDEX_BASE_URL ARG USE_TORCH_NIGHTLY + +# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY +RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ + echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \ + else \ + echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ + echo "PRERELEASE_FLAG=" >> /etc/environment; \ + fi +ENV PYTORCH_INDEX="" +ENV PRERELEASE_FLAG="" + COPY requirements/common.txt /tmp/common.txt COPY requirements/cuda.txt /tmp/requirements-cuda.txt RUN --mount=type=cache,target=/root/.cache/uv \ - if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ - PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ - PRERELEASE_FLAG="--prerelease=allow"; \ - else \ - PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ - PRERELEASE_FLAG=""; \ - fi && \ + . /etc/environment && \ uv pip install --system -r /tmp/requirements-cuda.txt \ --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} && \ rm /tmp/requirements-cuda.txt /tmp/common.txt @@ -560,13 +587,7 @@ ARG USE_TORCH_NIGHTLY # Install vllm wheel first, so that torch etc will be installed. RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ --mount=type=cache,target=/root/.cache/uv \ - if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ - PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ - PRERELEASE_FLAG="--prerelease=allow"; \ - else \ - PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ - PRERELEASE_FLAG=""; \ - fi && \ + . /etc/environment && \ uv pip install --system dist/*.whl --verbose \ --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} @@ -589,13 +610,7 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH # Install EP kernels wheels (pplx-kernels and DeepEP) that have been built in the `build` stage RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \ --mount=type=cache,target=/root/.cache/uv \ - if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ - PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ - PRERELEASE_FLAG="--prerelease=allow"; \ - else \ - PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ - PRERELEASE_FLAG=""; \ - fi && \ + . /etc/environment && \ uv pip install --system ep_kernels/dist/*.whl --verbose \ --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} @@ -631,6 +646,17 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" # Use copy mode to avoid hardlink failures with Docker cache mounts ENV UV_LINK_MODE=copy +# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY +RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ + echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \ + else \ + echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ + echo "PRERELEASE_FLAG=" >> /etc/environment; \ + fi +ENV PYTORCH_INDEX="" +ENV PRERELEASE_FLAG="" + RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ && apt-get update -y \ @@ -640,13 +666,7 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ RUN --mount=type=cache,target=/root/.cache/uv \ CUDA_MAJOR="${CUDA_VERSION%%.*}"; \ if [ "$CUDA_MAJOR" -ge 12 ]; then \ - if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ - PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ - PRERELEASE_FLAG="--prerelease=allow"; \ - else \ - PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \ - PRERELEASE_FLAG=""; \ - fi && \ + . /etc/environment && \ uv pip install --system -r requirements/dev.txt \ --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}; \ fi From 06df43c720f319c1ec555999eaa560d78d98e6ba Mon Sep 17 00:00:00 2001 From: atalman Date: Tue, 23 Dec 2025 14:05:27 -0800 Subject: [PATCH 3/4] refactor --- docker/Dockerfile | 83 +++++++++++++---------------------------------- 1 file changed, 22 insertions(+), 61 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index a5c5e94092a3f..6734a75ad8143 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -54,12 +54,13 @@ ARG PIP_EXTRA_INDEX_URL ARG UV_INDEX_URL=${PIP_INDEX_URL} ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} -# PyTorch provides its own indexes for standard and nightly builds -ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl - # Flag to install PyTorch nightly instead of stable ARG USE_TORCH_NIGHTLY=false +# PyTorch provides its own indexes for standard and nightly builds +# This will be set conditionally based on USE_TORCH_NIGHTLY in the base stage +ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl + # PIP supports multiple authentication schemes, including keyring # By parameterizing the PIP_KEYRING_PROVIDER variable and setting it to # disabled by default, we allow third-party to use keyring authentication for @@ -133,14 +134,20 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ ARG PYTORCH_CUDA_INDEX_BASE_URL ARG USE_TORCH_NIGHTLY -# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY +# Set PyTorch index URL based on USE_TORCH_NIGHTLY +# We compute the index URL once and reuse it across all stages RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ - echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ + PYTORCH_SUFFIX="/nightly"; \ + else \ + PYTORCH_SUFFIX=""; \ + fi && \ + echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}${PYTORCH_SUFFIX}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment && \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \ else \ - echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ echo "PRERELEASE_FLAG=" >> /etc/environment; \ fi + ENV PYTORCH_INDEX="" ENV PRERELEASE_FLAG="" RUN . /etc/environment && echo "PYTORCH_INDEX=${PYTORCH_INDEX}" && echo "PRERELEASE_FLAG=${PRERELEASE_FLAG}" @@ -168,8 +175,6 @@ ARG TARGETPLATFORM ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL -ARG PYTORCH_CUDA_INDEX_BASE_URL -ARG USE_TORCH_NIGHTLY # install build dependencies COPY requirements/build.txt requirements/build.txt @@ -181,17 +186,6 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" # Use copy mode to avoid hardlink failures with Docker cache mounts ENV UV_LINK_MODE=copy -# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY -RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ - echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ - echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \ - else \ - echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ - echo "PRERELEASE_FLAG=" >> /etc/environment; \ - fi -ENV PYTORCH_INDEX="" -ENV PRERELEASE_FLAG="" - RUN --mount=type=cache,target=/root/.cache/uv \ . /etc/environment && \ uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ @@ -314,8 +308,6 @@ ARG TARGETPLATFORM ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL -ARG PYTORCH_CUDA_INDEX_BASE_URL -ARG USE_TORCH_NIGHTLY # install build dependencies COPY requirements/build.txt requirements/build.txt @@ -327,17 +319,6 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" # Use copy mode to avoid hardlink failures with Docker cache mounts ENV UV_LINK_MODE=copy -# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY -RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ - echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ - echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \ - else \ - echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ - echo "PRERELEASE_FLAG=" >> /etc/environment; \ - fi -ENV PYTORCH_INDEX="" -ENV PRERELEASE_FLAG="" - RUN --mount=type=cache,target=/root/.cache/uv \ . /etc/environment && \ uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ @@ -389,8 +370,6 @@ FROM base AS dev ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL -ARG PYTORCH_CUDA_INDEX_BASE_URL -ARG USE_TORCH_NIGHTLY # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 @@ -399,17 +378,6 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" # Use copy mode to avoid hardlink failures with Docker cache mounts ENV UV_LINK_MODE=copy -# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY -RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ - echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ - echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \ - else \ - echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ - echo "PRERELEASE_FLAG=" >> /etc/environment; \ - fi -ENV PYTORCH_INDEX="" -ENV PRERELEASE_FLAG="" - # Install libnuma-dev, required by fastsafetensors (fixes #20384) RUN apt-get update && apt-get install -y --no-install-recommends libnuma-dev && rm -rf /var/lib/apt/lists/* COPY requirements/lint.txt requirements/lint.txt @@ -514,14 +482,20 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ ARG PYTORCH_CUDA_INDEX_BASE_URL ARG USE_TORCH_NIGHTLY -# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY +# Set PyTorch index URL based on USE_TORCH_NIGHTLY +# We compute the index URL once and reuse it across all stages RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ - echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ + PYTORCH_SUFFIX="/nightly"; \ + else \ + PYTORCH_SUFFIX=""; \ + fi && \ + echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}${PYTORCH_SUFFIX}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment && \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \ else \ - echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ echo "PRERELEASE_FLAG=" >> /etc/environment; \ fi + ENV PYTORCH_INDEX="" ENV PRERELEASE_FLAG="" @@ -636,8 +610,6 @@ ARG PYTHON_VERSION ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL -ARG PYTORCH_CUDA_INDEX_BASE_URL -ARG USE_TORCH_NIGHTLY # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 @@ -646,17 +618,6 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" # Use copy mode to avoid hardlink failures with Docker cache mounts ENV UV_LINK_MODE=copy -# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY -RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ - echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ - echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \ - else \ - echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \ - echo "PRERELEASE_FLAG=" >> /etc/environment; \ - fi -ENV PYTORCH_INDEX="" -ENV PRERELEASE_FLAG="" - RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ && apt-get update -y \ From 86323206761d09de969a6dc1f0e90b8b07e9dd97 Mon Sep 17 00:00:00 2001 From: atalman Date: Tue, 23 Dec 2025 14:22:28 -0800 Subject: [PATCH 4/4] more_changes --- .buildkite/release-pipeline.yaml | 65 ----------------------- docker/Dockerfile | 22 +++++++- use_existing_torch.py | 90 ++++++++++++++++++++++++++++---- 3 files changed, 100 insertions(+), 77 deletions(-) diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index 2477e60da4c16..c04bfac6644ca 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -197,68 +197,3 @@ steps: DOCKER_BUILDKIT: "1" DOCKERHUB_USERNAME: "vllmbot" - # Build nightly torch Docker images (x86) - - label: "Build nightly torch image (x86)" - depends_on: ~ - id: build-nightly-torch-image-x86 - if: build.env("NIGHTLY") == "1" - agents: - queue: cpu_queue_postmerge - commands: - - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg USE_TORCH_NIGHTLY=true --build-arg FLASHINFER_AOT_COMPILE=true --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 --target vllm-openai --progress plain -f docker/Dockerfile ." - - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64" - - # Build nightly torch Docker images (arm64) - - label: "Build nightly torch image (arm64)" - depends_on: ~ - id: build-nightly-torch-image-arm64 - if: build.env("NIGHTLY") == "1" - agents: - queue: arm64_cpu_queue_postmerge - commands: - - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg USE_TORCH_NIGHTLY=true --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 --target vllm-openai --progress plain -f docker/Dockerfile ." - - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64" - - # Create multi-arch manifest for nightly torch images - - label: "Create nightly torch multi-arch manifest" - depends_on: - - build-nightly-torch-image-x86 - - build-nightly-torch-image-arm64 - id: create-nightly-torch-multi-arch-manifest - if: build.env("NIGHTLY") == "1" - agents: - queue: cpu_queue_postmerge - commands: - - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 --amend" - - "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly" - - # Publish nightly torch images to DockerHub - - label: "Publish nightly torch images to DockerHub" - depends_on: - - create-nightly-torch-multi-arch-manifest - if: build.env("NIGHTLY") == "1" - agents: - queue: cpu_queue_postmerge - commands: - - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64" - - "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64" - - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-x86_64" - - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 vllm/vllm-openai:torch-nightly-aarch64" - - "docker push vllm/vllm-openai:torch-nightly-x86_64" - - "docker push vllm/vllm-openai:torch-nightly-aarch64" - - "docker manifest create vllm/vllm-openai:torch-nightly vllm/vllm-openai:torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-aarch64 --amend" - - "docker manifest create vllm/vllm-openai:torch-nightly-$BUILDKITE_COMMIT vllm/vllm-openai:torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-aarch64 --amend" - - "docker manifest push vllm/vllm-openai:torch-nightly" - - "docker manifest push vllm/vllm-openai:torch-nightly-$BUILDKITE_COMMIT" - plugins: - - docker-login#v3.0.0: - username: vllmbot - password-env: DOCKERHUB_TOKEN - env: - DOCKER_BUILDKIT: "1" - DOCKERHUB_USERNAME: "vllmbot" - diff --git a/docker/Dockerfile b/docker/Dockerfile index 6734a75ad8143..cdfca180c809d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -157,8 +157,13 @@ WORKDIR /workspace # install build and runtime dependencies COPY requirements/common.txt requirements/common.txt COPY requirements/cuda.txt requirements/cuda.txt +COPY use_existing_torch.py use_existing_torch.py RUN --mount=type=cache,target=/root/.cache/uv \ . /etc/environment && \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + echo ">>> Running use_existing_torch.py to reset torch dependencies for nightly build" && \ + python3 use_existing_torch.py; \ + fi && \ uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \ --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} @@ -178,6 +183,7 @@ ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL # install build dependencies COPY requirements/build.txt requirements/build.txt +COPY use_existing_torch.py use_existing_torch.py # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 @@ -188,6 +194,10 @@ ENV UV_LINK_MODE=copy RUN --mount=type=cache,target=/root/.cache/uv \ . /etc/environment && \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + echo ">>> Running use_existing_torch.py to reset torch dependencies for nightly build" && \ + python3 use_existing_torch.py; \ + fi && \ uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} @@ -311,6 +321,7 @@ ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL # install build dependencies COPY requirements/build.txt requirements/build.txt +COPY use_existing_torch.py use_existing_torch.py # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 @@ -321,6 +332,10 @@ ENV UV_LINK_MODE=copy RUN --mount=type=cache,target=/root/.cache/uv \ . /etc/environment && \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + echo ">>> Running use_existing_torch.py to reset torch dependencies for nightly build" && \ + python3 use_existing_torch.py; \ + fi && \ uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} @@ -501,11 +516,16 @@ ENV PRERELEASE_FLAG="" COPY requirements/common.txt /tmp/common.txt COPY requirements/cuda.txt /tmp/requirements-cuda.txt +COPY use_existing_torch.py /tmp/use_existing_torch.py RUN --mount=type=cache,target=/root/.cache/uv \ . /etc/environment && \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + echo ">>> Running use_existing_torch.py to reset torch dependencies for nightly build" && \ + cd /tmp && python3 use_existing_torch.py; \ + fi && \ uv pip install --system -r /tmp/requirements-cuda.txt \ --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} && \ - rm /tmp/requirements-cuda.txt /tmp/common.txt + rm /tmp/requirements-cuda.txt /tmp/common.txt /tmp/use_existing_torch.py # Install FlashInfer pre-compiled kernel cache and binaries # This is ~1.1GB and only changes when FlashInfer version bumps diff --git a/use_existing_torch.py b/use_existing_torch.py index e2d3f2ec81956..93274de8ce99a 100644 --- a/use_existing_torch.py +++ b/use_existing_torch.py @@ -2,17 +2,85 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import glob +import os +import re + +# Collect all files to process +files_to_process = list(glob.glob("requirements/*.txt")) + +# Add pyproject.toml if it exists +if os.path.exists("pyproject.toml"): + files_to_process.append("pyproject.toml") + +# Pattern to match torch package names we want to unpin +TORCH_PACKAGES = ['torch', 'torchaudio', 'torchvision', 'triton'] + +def unpin_torch_dependency(line): + """Remove version pinning from torch-related packages, keep the package name.""" + original_line = line + line_stripped = line.strip() + + # Skip empty lines + if not line_stripped: + return line + + # Skip full comment lines + if line_stripped.startswith('#'): + return line + + # Check if this line contains a torch package + for pkg in TORCH_PACKAGES: + # Check if line starts with the package name (case insensitive) + if line_stripped.lower().startswith(pkg): + # Extract inline comment if present + comment = '' + if '#' in line: + pkg_and_version, comment = line.split('#', 1) + comment = ' #' + comment.rstrip('\n') + else: + pkg_and_version = line + + # Check if there's a version specifier + # Matches any version constraint operators: ==, >=, <=, >, <, !=, ~= + if re.search(r'[=<>!~]', pkg_and_version): + # Get original capitalization of package name from the original line + orig_pkg = line_stripped.split()[0] if line_stripped.split() else pkg + # Extract just the package name without any version info + orig_pkg = re.split(r'[=<>!~]', orig_pkg)[0] + + result = f"{orig_pkg}{comment}\n" if comment else f"{orig_pkg}\n" + print(f" unpinned: {line.strip()} -> {result.strip()}") + return result + + return line + +for file in files_to_process: + if not os.path.exists(file): + print(f">>> skipping {file} (does not exist)") + continue -for file in (*glob.glob("requirements/*.txt"), "pyproject.toml"): print(f">>> cleaning {file}") - with open(file) as f: - lines = f.readlines() - if "torch" in "".join(lines).lower(): - print("removed:") - with open(file, "w") as f: - for line in lines: - if "torch" not in line.lower(): - f.write(line) - else: - print(line.strip()) + try: + with open(file) as f: + lines = f.readlines() + except Exception as e: + print(f"!!! error reading {file}: {e}") + continue + + # Check if we need to process this file + has_torch = any(any(pkg in line.lower() for pkg in TORCH_PACKAGES) for line in lines) + + if has_torch: + print("unpinning torch dependencies:") + try: + with open(file, "w") as f: + for line in lines: + new_line = unpin_torch_dependency(line) + f.write(new_line) + except Exception as e: + print(f"!!! error writing {file}: {e}") + continue + else: + print(" (no torch dependencies found)") + print(f"<<< done cleaning {file}\n")