From 0b9c701495e220038a0ef4e26037fa5edc7a6993 Mon Sep 17 00:00:00 2001
From: atalman <atalman@fb.com>
Date: Tue, 23 Dec 2025 13:53:30 -0800
Subject: [PATCH 1/4] Adopt Dockerfile to build nightly version

---
 .buildkite/release-pipeline.yaml | 66 +++++++++++++++++++++++++
 docker/Dockerfile                | 82 ++++++++++++++++++++++++++++----
 2 files changed, 140 insertions(+), 8 deletions(-)

diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml
index a9d51557bd9bb..2477e60da4c16 100644
--- a/.buildkite/release-pipeline.yaml
+++ b/.buildkite/release-pipeline.yaml
@@ -196,3 +196,69 @@ steps:
     env:
       DOCKER_BUILDKIT: "1"
       DOCKERHUB_USERNAME: "vllmbot"
+
+  # Build nightly torch Docker images (x86)
+  - label: "Build nightly torch image (x86)"
+    depends_on: ~
+    id: build-nightly-torch-image-x86
+    if: build.env("NIGHTLY") == "1"
+    agents:
+      queue: cpu_queue_postmerge
+    commands:
+      - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
+      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg USE_TORCH_NIGHTLY=true --build-arg FLASHINFER_AOT_COMPILE=true --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 --target vllm-openai --progress plain -f docker/Dockerfile ."
+      - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64"
+
+  # Build nightly torch Docker images (arm64)
+  - label: "Build nightly torch image (arm64)"
+    depends_on: ~
+    id: build-nightly-torch-image-arm64
+    if: build.env("NIGHTLY") == "1"
+    agents:
+      queue: arm64_cpu_queue_postmerge
+    commands:
+      - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
+      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg USE_TORCH_NIGHTLY=true --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 --target vllm-openai --progress plain -f docker/Dockerfile ."
+      - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64"
+
+  # Create multi-arch manifest for nightly torch images
+  - label: "Create nightly torch multi-arch manifest"
+    depends_on:
+      - build-nightly-torch-image-x86
+      - build-nightly-torch-image-arm64
+    id: create-nightly-torch-multi-arch-manifest
+    if: build.env("NIGHTLY") == "1"
+    agents:
+      queue: cpu_queue_postmerge
+    commands:
+      - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
+      - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 --amend"
+      - "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly"
+
+  # Publish nightly torch images to DockerHub
+  - label: "Publish nightly torch images to DockerHub"
+    depends_on:
+      - create-nightly-torch-multi-arch-manifest
+    if: build.env("NIGHTLY") == "1"
+    agents:
+      queue: cpu_queue_postmerge
+    commands:
+      - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
+      - "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64"
+      - "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64"
+      - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-x86_64"
+      - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 vllm/vllm-openai:torch-nightly-aarch64"
+      - "docker push vllm/vllm-openai:torch-nightly-x86_64"
+      - "docker push vllm/vllm-openai:torch-nightly-aarch64"
+      - "docker manifest create vllm/vllm-openai:torch-nightly vllm/vllm-openai:torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-aarch64 --amend"
+      - "docker manifest create vllm/vllm-openai:torch-nightly-$BUILDKITE_COMMIT vllm/vllm-openai:torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-aarch64 --amend"
+      - "docker manifest push vllm/vllm-openai:torch-nightly"
+      - "docker manifest push vllm/vllm-openai:torch-nightly-$BUILDKITE_COMMIT"
+    plugins:
+      - docker-login#v3.0.0:
+          username: vllmbot
+          password-env: DOCKERHUB_TOKEN
+    env:
+      DOCKER_BUILDKIT: "1"
+      DOCKERHUB_USERNAME: "vllmbot"
+
diff --git a/docker/Dockerfile b/docker/Dockerfile
index e61021b6eeb85..fd965b7f7ae0e 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -57,6 +57,9 @@ ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
 # PyTorch provides its own indexes for standard and nightly builds
 ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl
 
+# Flag to install PyTorch nightly instead of stable
+ARG USE_TORCH_NIGHTLY=false
+
 # PIP supports multiple authentication schemes, including keyring
 # By parameterizing the PIP_KEYRING_PROVIDER variable and setting it to
 # disabled by default, we allow third-party to use keyring authentication for
@@ -128,6 +131,7 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
 # Install PyTorch and core CUDA dependencies
 # This is ~2GB and rarely changes
 ARG PYTORCH_CUDA_INDEX_BASE_URL
+ARG USE_TORCH_NIGHTLY
 
 WORKDIR /workspace
 
@@ -135,8 +139,15 @@ WORKDIR /workspace
 COPY requirements/common.txt requirements/common.txt
 COPY requirements/cuda.txt requirements/cuda.txt
 RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
+        PRERELEASE_FLAG="--prerelease=allow"; \
+    else \
+        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
+        PRERELEASE_FLAG=""; \
+    fi && \
     uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \
-    --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
+    --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
 
 # CUDA arch list used by torch
 # Explicitly set the list to avoid issues with torch 2.2
@@ -152,6 +163,7 @@ ARG TARGETPLATFORM
 ARG PIP_INDEX_URL UV_INDEX_URL
 ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
 ARG PYTORCH_CUDA_INDEX_BASE_URL
+ARG USE_TORCH_NIGHTLY
 
 # install build dependencies
 COPY requirements/build.txt requirements/build.txt
@@ -164,8 +176,15 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 ENV UV_LINK_MODE=copy
 
 RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
+        PRERELEASE_FLAG="--prerelease=allow"; \
+    else \
+        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
+        PRERELEASE_FLAG=""; \
+    fi && \
     uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
-    --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
+    --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
 
 WORKDIR /workspace
 
@@ -285,6 +304,7 @@ ARG TARGETPLATFORM
 ARG PIP_INDEX_URL UV_INDEX_URL
 ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
 ARG PYTORCH_CUDA_INDEX_BASE_URL
+ARG USE_TORCH_NIGHTLY
 
 # install build dependencies
 COPY requirements/build.txt requirements/build.txt
@@ -297,8 +317,15 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 ENV UV_LINK_MODE=copy
 
 RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
+        PRERELEASE_FLAG="--prerelease=allow"; \
+    else \
+        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
+        PRERELEASE_FLAG=""; \
+    fi && \
     uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
-    --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
+    --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
 
 WORKDIR /workspace
 
@@ -347,6 +374,7 @@ FROM base AS dev
 ARG PIP_INDEX_URL UV_INDEX_URL
 ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
 ARG PYTORCH_CUDA_INDEX_BASE_URL
+ARG USE_TORCH_NIGHTLY
 
 # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
 # Reference: https://github.com/astral-sh/uv/pull/1694
@@ -361,8 +389,15 @@ COPY requirements/lint.txt requirements/lint.txt
 COPY requirements/test.txt requirements/test.txt
 COPY requirements/dev.txt requirements/dev.txt
 RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
+        PRERELEASE_FLAG="--prerelease=allow"; \
+    else \
+        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
+        PRERELEASE_FLAG=""; \
+    fi && \
     uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \
-    --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
+    --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
 #################### DEV IMAGE ####################
 #################### vLLM installation IMAGE ####################
 # image with vLLM installed
@@ -456,11 +491,19 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
 # Install PyTorch and core CUDA dependencies
 # This is ~2GB and rarely changes
 ARG PYTORCH_CUDA_INDEX_BASE_URL
+ARG USE_TORCH_NIGHTLY
 COPY requirements/common.txt /tmp/common.txt
 COPY requirements/cuda.txt /tmp/requirements-cuda.txt
 RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
+        PRERELEASE_FLAG="--prerelease=allow"; \
+    else \
+        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
+        PRERELEASE_FLAG=""; \
+    fi && \
     uv pip install --system -r /tmp/requirements-cuda.txt \
-        --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && \
+        --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} && \
     rm /tmp/requirements-cuda.txt /tmp/common.txt
 
 # Install FlashInfer pre-compiled kernel cache and binaries
@@ -512,12 +555,20 @@ ARG PIP_INDEX_URL UV_INDEX_URL
 ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
 ARG PYTORCH_CUDA_INDEX_BASE_URL
 ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER
+ARG USE_TORCH_NIGHTLY
 
 # Install vllm wheel first, so that torch etc will be installed.
 RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
     --mount=type=cache,target=/root/.cache/uv \
+    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
+        PRERELEASE_FLAG="--prerelease=allow"; \
+    else \
+        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
+        PRERELEASE_FLAG=""; \
+    fi && \
     uv pip install --system dist/*.whl --verbose \
-        --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
+        --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
 
 RUN --mount=type=cache,target=/root/.cache/uv \
 . /etc/environment && \
@@ -538,8 +589,15 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
 # Install EP kernels wheels (pplx-kernels and DeepEP) that have been built in the `build` stage
 RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \
     --mount=type=cache,target=/root/.cache/uv \
+    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
+        PRERELEASE_FLAG="--prerelease=allow"; \
+    else \
+        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
+        PRERELEASE_FLAG=""; \
+    fi && \
     uv pip install --system ep_kernels/dist/*.whl --verbose \
-        --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
+        --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
 
 # CUDA image changed from /usr/local/nvidia to /usr/local/cuda in 12.8 but will
 # return to /usr/local/nvidia in 13.0 to allow container providers to mount drivers
@@ -564,6 +622,7 @@ ARG PYTHON_VERSION
 ARG PIP_INDEX_URL UV_INDEX_URL
 ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
 ARG PYTORCH_CUDA_INDEX_BASE_URL
+ARG USE_TORCH_NIGHTLY
 
 # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
 # Reference: https://github.com/astral-sh/uv/pull/1694
@@ -581,8 +640,15 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
 RUN --mount=type=cache,target=/root/.cache/uv \
     CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
     if [ "$CUDA_MAJOR" -ge 12 ]; then \
+        if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+            PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
+            PRERELEASE_FLAG="--prerelease=allow"; \
+        else \
+            PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
+            PRERELEASE_FLAG=""; \
+        fi && \
         uv pip install --system -r requirements/dev.txt \
-        --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
+        --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}; \
     fi
 
 # install development dependencies (for testing)

From 69b9a839855019248ba0eee1e4cc49d18e14b6c8 Mon Sep 17 00:00:00 2001
From: atalman <atalman@fb.com>
Date: Tue, 23 Dec 2025 13:59:34 -0800
Subject: [PATCH 2/4] refactor

---
 docker/Dockerfile | 132 ++++++++++++++++++++++++++--------------------
 1 file changed, 76 insertions(+), 56 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index fd965b7f7ae0e..a5c5e94092a3f 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -133,19 +133,25 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
 ARG PYTORCH_CUDA_INDEX_BASE_URL
 ARG USE_TORCH_NIGHTLY
 
+# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY
+RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
+        echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \
+    else \
+        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
+        echo "PRERELEASE_FLAG=" >> /etc/environment; \
+    fi
+ENV PYTORCH_INDEX=""
+ENV PRERELEASE_FLAG=""
+RUN . /etc/environment && echo "PYTORCH_INDEX=${PYTORCH_INDEX}" && echo "PRERELEASE_FLAG=${PRERELEASE_FLAG}"
+
 WORKDIR /workspace
 
 # install build and runtime dependencies
 COPY requirements/common.txt requirements/common.txt
 COPY requirements/cuda.txt requirements/cuda.txt
 RUN --mount=type=cache,target=/root/.cache/uv \
-    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
-        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
-        PRERELEASE_FLAG="--prerelease=allow"; \
-    else \
-        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
-        PRERELEASE_FLAG=""; \
-    fi && \
+    . /etc/environment && \
     uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \
     --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
 
@@ -175,14 +181,19 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 # Use copy mode to avoid hardlink failures with Docker cache mounts
 ENV UV_LINK_MODE=copy
 
-RUN --mount=type=cache,target=/root/.cache/uv \
-    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
-        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
-        PRERELEASE_FLAG="--prerelease=allow"; \
+# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY
+RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
+        echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \
     else \
-        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
-        PRERELEASE_FLAG=""; \
-    fi && \
+        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
+        echo "PRERELEASE_FLAG=" >> /etc/environment; \
+    fi
+ENV PYTORCH_INDEX=""
+ENV PRERELEASE_FLAG=""
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    . /etc/environment && \
     uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
     --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
 
@@ -316,14 +327,19 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 # Use copy mode to avoid hardlink failures with Docker cache mounts
 ENV UV_LINK_MODE=copy
 
-RUN --mount=type=cache,target=/root/.cache/uv \
-    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
-        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
-        PRERELEASE_FLAG="--prerelease=allow"; \
+# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY
+RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
+        echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \
     else \
-        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
-        PRERELEASE_FLAG=""; \
-    fi && \
+        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
+        echo "PRERELEASE_FLAG=" >> /etc/environment; \
+    fi
+ENV PYTORCH_INDEX=""
+ENV PRERELEASE_FLAG=""
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    . /etc/environment && \
     uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
     --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
 
@@ -383,19 +399,24 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 # Use copy mode to avoid hardlink failures with Docker cache mounts
 ENV UV_LINK_MODE=copy
 
+# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY
+RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
+        echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \
+    else \
+        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
+        echo "PRERELEASE_FLAG=" >> /etc/environment; \
+    fi
+ENV PYTORCH_INDEX=""
+ENV PRERELEASE_FLAG=""
+
 # Install libnuma-dev, required by fastsafetensors (fixes #20384)
 RUN apt-get update && apt-get install -y --no-install-recommends libnuma-dev && rm -rf /var/lib/apt/lists/*
 COPY requirements/lint.txt requirements/lint.txt
 COPY requirements/test.txt requirements/test.txt
 COPY requirements/dev.txt requirements/dev.txt
 RUN --mount=type=cache,target=/root/.cache/uv \
-    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
-        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
-        PRERELEASE_FLAG="--prerelease=allow"; \
-    else \
-        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
-        PRERELEASE_FLAG=""; \
-    fi && \
+    . /etc/environment && \
     uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \
     --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
 #################### DEV IMAGE ####################
@@ -492,16 +513,22 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
 # This is ~2GB and rarely changes
 ARG PYTORCH_CUDA_INDEX_BASE_URL
 ARG USE_TORCH_NIGHTLY
+
+# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY
+RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
+        echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \
+    else \
+        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
+        echo "PRERELEASE_FLAG=" >> /etc/environment; \
+    fi
+ENV PYTORCH_INDEX=""
+ENV PRERELEASE_FLAG=""
+
 COPY requirements/common.txt /tmp/common.txt
 COPY requirements/cuda.txt /tmp/requirements-cuda.txt
 RUN --mount=type=cache,target=/root/.cache/uv \
-    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
-        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
-        PRERELEASE_FLAG="--prerelease=allow"; \
-    else \
-        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
-        PRERELEASE_FLAG=""; \
-    fi && \
+    . /etc/environment && \
     uv pip install --system -r /tmp/requirements-cuda.txt \
         --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} && \
     rm /tmp/requirements-cuda.txt /tmp/common.txt
@@ -560,13 +587,7 @@ ARG USE_TORCH_NIGHTLY
 # Install vllm wheel first, so that torch etc will be installed.
 RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
     --mount=type=cache,target=/root/.cache/uv \
-    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
-        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
-        PRERELEASE_FLAG="--prerelease=allow"; \
-    else \
-        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
-        PRERELEASE_FLAG=""; \
-    fi && \
+    . /etc/environment && \
     uv pip install --system dist/*.whl --verbose \
         --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
 
@@ -589,13 +610,7 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
 # Install EP kernels wheels (pplx-kernels and DeepEP) that have been built in the `build` stage
 RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \
     --mount=type=cache,target=/root/.cache/uv \
-    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
-        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
-        PRERELEASE_FLAG="--prerelease=allow"; \
-    else \
-        PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
-        PRERELEASE_FLAG=""; \
-    fi && \
+    . /etc/environment && \
     uv pip install --system ep_kernels/dist/*.whl --verbose \
         --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
 
@@ -631,6 +646,17 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 # Use copy mode to avoid hardlink failures with Docker cache mounts
 ENV UV_LINK_MODE=copy
 
+# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY
+RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
+        echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \
+    else \
+        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
+        echo "PRERELEASE_FLAG=" >> /etc/environment; \
+    fi
+ENV PYTORCH_INDEX=""
+ENV PRERELEASE_FLAG=""
+
 RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
     && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
     && apt-get update -y \
@@ -640,13 +666,7 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
 RUN --mount=type=cache,target=/root/.cache/uv \
     CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
     if [ "$CUDA_MAJOR" -ge 12 ]; then \
-        if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
-            PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
-            PRERELEASE_FLAG="--prerelease=allow"; \
-        else \
-            PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
-            PRERELEASE_FLAG=""; \
-        fi && \
+        . /etc/environment && \
         uv pip install --system -r requirements/dev.txt \
         --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}; \
     fi

From 06df43c720f319c1ec555999eaa560d78d98e6ba Mon Sep 17 00:00:00 2001
From: atalman <atalman@fb.com>
Date: Tue, 23 Dec 2025 14:05:27 -0800
Subject: [PATCH 3/4] refactor

---
 docker/Dockerfile | 83 +++++++++++++----------------------------------
 1 file changed, 22 insertions(+), 61 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index a5c5e94092a3f..6734a75ad8143 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -54,12 +54,13 @@ ARG PIP_EXTRA_INDEX_URL
 ARG UV_INDEX_URL=${PIP_INDEX_URL}
 ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
 
-# PyTorch provides its own indexes for standard and nightly builds
-ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl
-
 # Flag to install PyTorch nightly instead of stable
 ARG USE_TORCH_NIGHTLY=false
 
+# PyTorch provides its own indexes for standard and nightly builds
+# This will be set conditionally based on USE_TORCH_NIGHTLY in the base stage
+ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl
+
 # PIP supports multiple authentication schemes, including keyring
 # By parameterizing the PIP_KEYRING_PROVIDER variable and setting it to
 # disabled by default, we allow third-party to use keyring authentication for
@@ -133,14 +134,20 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
 ARG PYTORCH_CUDA_INDEX_BASE_URL
 ARG USE_TORCH_NIGHTLY
 
-# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY
+# Set PyTorch index URL based on USE_TORCH_NIGHTLY
+# We compute the index URL once and reuse it across all stages
 RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
-        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
+        PYTORCH_SUFFIX="/nightly"; \
+    else \
+        PYTORCH_SUFFIX=""; \
+    fi && \
+    echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}${PYTORCH_SUFFIX}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment && \
+    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
         echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \
     else \
-        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
         echo "PRERELEASE_FLAG=" >> /etc/environment; \
     fi
+
 ENV PYTORCH_INDEX=""
 ENV PRERELEASE_FLAG=""
 RUN . /etc/environment && echo "PYTORCH_INDEX=${PYTORCH_INDEX}" && echo "PRERELEASE_FLAG=${PRERELEASE_FLAG}"
@@ -168,8 +175,6 @@ ARG TARGETPLATFORM
 
 ARG PIP_INDEX_URL UV_INDEX_URL
 ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
-ARG PYTORCH_CUDA_INDEX_BASE_URL
-ARG USE_TORCH_NIGHTLY
 
 # install build dependencies
 COPY requirements/build.txt requirements/build.txt
@@ -181,17 +186,6 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 # Use copy mode to avoid hardlink failures with Docker cache mounts
 ENV UV_LINK_MODE=copy
 
-# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY
-RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
-        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
-        echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \
-    else \
-        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
-        echo "PRERELEASE_FLAG=" >> /etc/environment; \
-    fi
-ENV PYTORCH_INDEX=""
-ENV PRERELEASE_FLAG=""
-
 RUN --mount=type=cache,target=/root/.cache/uv \
     . /etc/environment && \
     uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
@@ -314,8 +308,6 @@ ARG TARGETPLATFORM
 
 ARG PIP_INDEX_URL UV_INDEX_URL
 ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
-ARG PYTORCH_CUDA_INDEX_BASE_URL
-ARG USE_TORCH_NIGHTLY
 
 # install build dependencies
 COPY requirements/build.txt requirements/build.txt
@@ -327,17 +319,6 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 # Use copy mode to avoid hardlink failures with Docker cache mounts
 ENV UV_LINK_MODE=copy
 
-# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY
-RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
-        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
-        echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \
-    else \
-        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
-        echo "PRERELEASE_FLAG=" >> /etc/environment; \
-    fi
-ENV PYTORCH_INDEX=""
-ENV PRERELEASE_FLAG=""
-
 RUN --mount=type=cache,target=/root/.cache/uv \
     . /etc/environment && \
     uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
@@ -389,8 +370,6 @@ FROM base AS dev
 
 ARG PIP_INDEX_URL UV_INDEX_URL
 ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
-ARG PYTORCH_CUDA_INDEX_BASE_URL
-ARG USE_TORCH_NIGHTLY
 
 # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
 # Reference: https://github.com/astral-sh/uv/pull/1694
@@ -399,17 +378,6 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 # Use copy mode to avoid hardlink failures with Docker cache mounts
 ENV UV_LINK_MODE=copy
 
-# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY
-RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
-        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
-        echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \
-    else \
-        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
-        echo "PRERELEASE_FLAG=" >> /etc/environment; \
-    fi
-ENV PYTORCH_INDEX=""
-ENV PRERELEASE_FLAG=""
-
 # Install libnuma-dev, required by fastsafetensors (fixes #20384)
 RUN apt-get update && apt-get install -y --no-install-recommends libnuma-dev && rm -rf /var/lib/apt/lists/*
 COPY requirements/lint.txt requirements/lint.txt
@@ -514,14 +482,20 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
 ARG PYTORCH_CUDA_INDEX_BASE_URL
 ARG USE_TORCH_NIGHTLY
 
-# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY
+# Set PyTorch index URL based on USE_TORCH_NIGHTLY
+# We compute the index URL once and reuse it across all stages
 RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
-        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
+        PYTORCH_SUFFIX="/nightly"; \
+    else \
+        PYTORCH_SUFFIX=""; \
+    fi && \
+    echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}${PYTORCH_SUFFIX}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment && \
+    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
         echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \
     else \
-        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
         echo "PRERELEASE_FLAG=" >> /etc/environment; \
     fi
+
 ENV PYTORCH_INDEX=""
 ENV PRERELEASE_FLAG=""
 
@@ -636,8 +610,6 @@ ARG PYTHON_VERSION
 
 ARG PIP_INDEX_URL UV_INDEX_URL
 ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
-ARG PYTORCH_CUDA_INDEX_BASE_URL
-ARG USE_TORCH_NIGHTLY
 
 # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
 # Reference: https://github.com/astral-sh/uv/pull/1694
@@ -646,17 +618,6 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 # Use copy mode to avoid hardlink failures with Docker cache mounts
 ENV UV_LINK_MODE=copy
 
-# Set PyTorch index URL and prerelease flag based on USE_TORCH_NIGHTLY
-RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
-        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
-        echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \
-    else \
-        echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment; \
-        echo "PRERELEASE_FLAG=" >> /etc/environment; \
-    fi
-ENV PYTORCH_INDEX=""
-ENV PRERELEASE_FLAG=""
-
 RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
     && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
     && apt-get update -y \

From 86323206761d09de969a6dc1f0e90b8b07e9dd97 Mon Sep 17 00:00:00 2001
From: atalman <atalman@fb.com>
Date: Tue, 23 Dec 2025 14:22:28 -0800
Subject: [PATCH 4/4] more_changes

---
 .buildkite/release-pipeline.yaml | 65 -----------------------
 docker/Dockerfile                | 22 +++++++-
 use_existing_torch.py            | 90 ++++++++++++++++++++++++++++----
 3 files changed, 100 insertions(+), 77 deletions(-)

diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml
index 2477e60da4c16..c04bfac6644ca 100644
--- a/.buildkite/release-pipeline.yaml
+++ b/.buildkite/release-pipeline.yaml
@@ -197,68 +197,3 @@ steps:
       DOCKER_BUILDKIT: "1"
       DOCKERHUB_USERNAME: "vllmbot"
 
-  # Build nightly torch Docker images (x86)
-  - label: "Build nightly torch image (x86)"
-    depends_on: ~
-    id: build-nightly-torch-image-x86
-    if: build.env("NIGHTLY") == "1"
-    agents:
-      queue: cpu_queue_postmerge
-    commands:
-      - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg USE_TORCH_NIGHTLY=true --build-arg FLASHINFER_AOT_COMPILE=true --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 --target vllm-openai --progress plain -f docker/Dockerfile ."
-      - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64"
-
-  # Build nightly torch Docker images (arm64)
-  - label: "Build nightly torch image (arm64)"
-    depends_on: ~
-    id: build-nightly-torch-image-arm64
-    if: build.env("NIGHTLY") == "1"
-    agents:
-      queue: arm64_cpu_queue_postmerge
-    commands:
-      - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg USE_TORCH_NIGHTLY=true --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 --target vllm-openai --progress plain -f docker/Dockerfile ."
-      - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64"
-
-  # Create multi-arch manifest for nightly torch images
-  - label: "Create nightly torch multi-arch manifest"
-    depends_on:
-      - build-nightly-torch-image-x86
-      - build-nightly-torch-image-arm64
-    id: create-nightly-torch-multi-arch-manifest
-    if: build.env("NIGHTLY") == "1"
-    agents:
-      queue: cpu_queue_postmerge
-    commands:
-      - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-      - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 --amend"
-      - "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly"
-
-  # Publish nightly torch images to DockerHub
-  - label: "Publish nightly torch images to DockerHub"
-    depends_on:
-      - create-nightly-torch-multi-arch-manifest
-    if: build.env("NIGHTLY") == "1"
-    agents:
-      queue: cpu_queue_postmerge
-    commands:
-      - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-      - "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64"
-      - "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64"
-      - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-x86_64"
-      - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 vllm/vllm-openai:torch-nightly-aarch64"
-      - "docker push vllm/vllm-openai:torch-nightly-x86_64"
-      - "docker push vllm/vllm-openai:torch-nightly-aarch64"
-      - "docker manifest create vllm/vllm-openai:torch-nightly vllm/vllm-openai:torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-aarch64 --amend"
-      - "docker manifest create vllm/vllm-openai:torch-nightly-$BUILDKITE_COMMIT vllm/vllm-openai:torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-aarch64 --amend"
-      - "docker manifest push vllm/vllm-openai:torch-nightly"
-      - "docker manifest push vllm/vllm-openai:torch-nightly-$BUILDKITE_COMMIT"
-    plugins:
-      - docker-login#v3.0.0:
-          username: vllmbot
-          password-env: DOCKERHUB_TOKEN
-    env:
-      DOCKER_BUILDKIT: "1"
-      DOCKERHUB_USERNAME: "vllmbot"
-
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 6734a75ad8143..cdfca180c809d 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -157,8 +157,13 @@ WORKDIR /workspace
 # install build and runtime dependencies
 COPY requirements/common.txt requirements/common.txt
 COPY requirements/cuda.txt requirements/cuda.txt
+COPY use_existing_torch.py use_existing_torch.py
 RUN --mount=type=cache,target=/root/.cache/uv \
     . /etc/environment && \
+    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        echo ">>> Running use_existing_torch.py to reset torch dependencies for nightly build" && \
+        python3 use_existing_torch.py; \
+    fi && \
     uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \
     --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
 
@@ -178,6 +183,7 @@ ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
 
 # install build dependencies
 COPY requirements/build.txt requirements/build.txt
+COPY use_existing_torch.py use_existing_torch.py
 
 # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
 # Reference: https://github.com/astral-sh/uv/pull/1694
@@ -188,6 +194,10 @@ ENV UV_LINK_MODE=copy
 
 RUN --mount=type=cache,target=/root/.cache/uv \
     . /etc/environment && \
+    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        echo ">>> Running use_existing_torch.py to reset torch dependencies for nightly build" && \
+        python3 use_existing_torch.py; \
+    fi && \
     uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
     --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
 
@@ -311,6 +321,7 @@ ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
 
 # install build dependencies
 COPY requirements/build.txt requirements/build.txt
+COPY use_existing_torch.py use_existing_torch.py
 
 # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
 # Reference: https://github.com/astral-sh/uv/pull/1694
@@ -321,6 +332,10 @@ ENV UV_LINK_MODE=copy
 
 RUN --mount=type=cache,target=/root/.cache/uv \
     . /etc/environment && \
+    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        echo ">>> Running use_existing_torch.py to reset torch dependencies for nightly build" && \
+        python3 use_existing_torch.py; \
+    fi && \
     uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
     --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
 
@@ -501,11 +516,16 @@ ENV PRERELEASE_FLAG=""
 
 COPY requirements/common.txt /tmp/common.txt
 COPY requirements/cuda.txt /tmp/requirements-cuda.txt
+COPY use_existing_torch.py /tmp/use_existing_torch.py
 RUN --mount=type=cache,target=/root/.cache/uv \
     . /etc/environment && \
+    if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
+        echo ">>> Running use_existing_torch.py to reset torch dependencies for nightly build" && \
+        cd /tmp && python3 use_existing_torch.py; \
+    fi && \
     uv pip install --system -r /tmp/requirements-cuda.txt \
         --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} && \
-    rm /tmp/requirements-cuda.txt /tmp/common.txt
+    rm /tmp/requirements-cuda.txt /tmp/common.txt /tmp/use_existing_torch.py
 
 # Install FlashInfer pre-compiled kernel cache and binaries
 # This is ~1.1GB and only changes when FlashInfer version bumps
diff --git a/use_existing_torch.py b/use_existing_torch.py
index e2d3f2ec81956..93274de8ce99a 100644
--- a/use_existing_torch.py
+++ b/use_existing_torch.py
@@ -2,17 +2,85 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import glob
+import os
+import re
+
+# Collect all files to process
+files_to_process = list(glob.glob("requirements/*.txt"))
+
+# Add pyproject.toml if it exists
+if os.path.exists("pyproject.toml"):
+    files_to_process.append("pyproject.toml")
+
+# Pattern to match torch package names we want to unpin
+TORCH_PACKAGES = ['torch', 'torchaudio', 'torchvision', 'triton']
+
+def unpin_torch_dependency(line):
+    """Remove version pinning from torch-related packages, keep the package name."""
+    original_line = line
+    line_stripped = line.strip()
+
+    # Skip empty lines
+    if not line_stripped:
+        return line
+
+    # Skip full comment lines
+    if line_stripped.startswith('#'):
+        return line
+
+    # Check if this line contains a torch package
+    for pkg in TORCH_PACKAGES:
+        # Check if line starts with the package name (case insensitive)
+        if line_stripped.lower().startswith(pkg):
+            # Extract inline comment if present
+            comment = ''
+            if '#' in line:
+                pkg_and_version, comment = line.split('#', 1)
+                comment = '  #' + comment.rstrip('\n')
+            else:
+                pkg_and_version = line
+
+            # Check if there's a version specifier
+            # Matches any version constraint operators: ==, >=, <=, >, <, !=, ~=
+            if re.search(r'[=<>!~]', pkg_and_version):
+                # Get original capitalization of package name from the original line
+                orig_pkg = line_stripped.split()[0] if line_stripped.split() else pkg
+                # Extract just the package name without any version info
+                orig_pkg = re.split(r'[=<>!~]', orig_pkg)[0]
+
+                result = f"{orig_pkg}{comment}\n" if comment else f"{orig_pkg}\n"
+                print(f"  unpinned: {line.strip()} -> {result.strip()}")
+                return result
+
+    return line
+
+for file in files_to_process:
+    if not os.path.exists(file):
+        print(f">>> skipping {file} (does not exist)")
+        continue
 
-for file in (*glob.glob("requirements/*.txt"), "pyproject.toml"):
     print(f">>> cleaning {file}")
-    with open(file) as f:
-        lines = f.readlines()
-    if "torch" in "".join(lines).lower():
-        print("removed:")
-        with open(file, "w") as f:
-            for line in lines:
-                if "torch" not in line.lower():
-                    f.write(line)
-                else:
-                    print(line.strip())
+    try:
+        with open(file) as f:
+            lines = f.readlines()
+    except Exception as e:
+        print(f"!!! error reading {file}: {e}")
+        continue
+
+    # Check if we need to process this file
+    has_torch = any(any(pkg in line.lower() for pkg in TORCH_PACKAGES) for line in lines)
+
+    if has_torch:
+        print("unpinning torch dependencies:")
+        try:
+            with open(file, "w") as f:
+                for line in lines:
+                    new_line = unpin_torch_dependency(line)
+                    f.write(new_line)
+        except Exception as e:
+            print(f"!!! error writing {file}: {e}")
+            continue
+    else:
+        print("  (no torch dependencies found)")
+
     print(f"<<< done cleaning {file}\n")