Merge 86323206761d09de969a6dc1f0e90b8b07e9dd97 into 254f6b986720c92ddf97fbb1a6a6465da8e87e29

This commit is contained in:
Andrey Talman 2025-12-25 00:06:40 +00:00 committed by GitHub
commit 767510811d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 160 additions and 24 deletions

View File

@ -196,3 +196,4 @@ steps:
env:
DOCKER_BUILDKIT: "1"
DOCKERHUB_USERNAME: "vllmbot"

View File

@ -54,7 +54,11 @@ ARG PIP_EXTRA_INDEX_URL
ARG UV_INDEX_URL=${PIP_INDEX_URL}
ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
# Flag to install PyTorch nightly instead of stable
ARG USE_TORCH_NIGHTLY=false
# PyTorch provides its own indexes for standard and nightly builds
# This will be set conditionally based on USE_TORCH_NIGHTLY in the base stage
ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl
# PIP supports multiple authentication schemes, including keyring
@ -128,15 +132,40 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
# Install PyTorch and core CUDA dependencies
# This is ~2GB and rarely changes
ARG PYTORCH_CUDA_INDEX_BASE_URL
ARG USE_TORCH_NIGHTLY
# Set PyTorch index URL based on USE_TORCH_NIGHTLY
# We compute the index URL once and reuse it across all stages
RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
PYTORCH_SUFFIX="/nightly"; \
else \
PYTORCH_SUFFIX=""; \
fi && \
echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}${PYTORCH_SUFFIX}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment && \
if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \
else \
echo "PRERELEASE_FLAG=" >> /etc/environment; \
fi
ENV PYTORCH_INDEX=""
ENV PRERELEASE_FLAG=""
RUN . /etc/environment && echo "PYTORCH_INDEX=${PYTORCH_INDEX}" && echo "PRERELEASE_FLAG=${PRERELEASE_FLAG}"
WORKDIR /workspace
# install build and runtime dependencies
COPY requirements/common.txt requirements/common.txt
COPY requirements/cuda.txt requirements/cuda.txt
COPY use_existing_torch.py use_existing_torch.py
RUN --mount=type=cache,target=/root/.cache/uv \
. /etc/environment && \
if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
echo ">>> Running use_existing_torch.py to reset torch dependencies for nightly build" && \
python3 use_existing_torch.py; \
fi && \
uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
--extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
# CUDA arch list used by torch
# Explicitly set the list to avoid issues with torch 2.2
@ -151,10 +180,10 @@ ARG TARGETPLATFORM
ARG PIP_INDEX_URL UV_INDEX_URL
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
ARG PYTORCH_CUDA_INDEX_BASE_URL
# install build dependencies
COPY requirements/build.txt requirements/build.txt
COPY use_existing_torch.py use_existing_torch.py
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
# Reference: https://github.com/astral-sh/uv/pull/1694
@ -164,8 +193,13 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
ENV UV_LINK_MODE=copy
RUN --mount=type=cache,target=/root/.cache/uv \
. /etc/environment && \
if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
echo ">>> Running use_existing_torch.py to reset torch dependencies for nightly build" && \
python3 use_existing_torch.py; \
fi && \
uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
--extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
WORKDIR /workspace
@ -290,10 +324,10 @@ ARG TARGETPLATFORM
ARG PIP_INDEX_URL UV_INDEX_URL
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
ARG PYTORCH_CUDA_INDEX_BASE_URL
# install build dependencies
COPY requirements/build.txt requirements/build.txt
COPY use_existing_torch.py use_existing_torch.py
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
# Reference: https://github.com/astral-sh/uv/pull/1694
@ -303,8 +337,13 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
ENV UV_LINK_MODE=copy
RUN --mount=type=cache,target=/root/.cache/uv \
. /etc/environment && \
if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
echo ">>> Running use_existing_torch.py to reset torch dependencies for nightly build" && \
python3 use_existing_torch.py; \
fi && \
uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
--extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
WORKDIR /workspace
@ -352,7 +391,6 @@ FROM base AS dev
ARG PIP_INDEX_URL UV_INDEX_URL
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
ARG PYTORCH_CUDA_INDEX_BASE_URL
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
# Reference: https://github.com/astral-sh/uv/pull/1694
@ -367,8 +405,9 @@ COPY requirements/lint.txt requirements/lint.txt
COPY requirements/test.txt requirements/test.txt
COPY requirements/dev.txt requirements/dev.txt
RUN --mount=type=cache,target=/root/.cache/uv \
. /etc/environment && \
uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
--extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
#################### DEV IMAGE ####################
#################### vLLM installation IMAGE ####################
# image with vLLM installed
@ -462,12 +501,37 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
# Install PyTorch and core CUDA dependencies
# This is ~2GB and rarely changes
ARG PYTORCH_CUDA_INDEX_BASE_URL
ARG USE_TORCH_NIGHTLY
# Set PyTorch index URL based on USE_TORCH_NIGHTLY
# We compute the index URL once and reuse it across all stages
RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
PYTORCH_SUFFIX="/nightly"; \
else \
PYTORCH_SUFFIX=""; \
fi && \
echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}${PYTORCH_SUFFIX}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment && \
if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \
else \
echo "PRERELEASE_FLAG=" >> /etc/environment; \
fi
ENV PYTORCH_INDEX=""
ENV PRERELEASE_FLAG=""
COPY requirements/common.txt /tmp/common.txt
COPY requirements/cuda.txt /tmp/requirements-cuda.txt
COPY use_existing_torch.py /tmp/use_existing_torch.py
RUN --mount=type=cache,target=/root/.cache/uv \
. /etc/environment && \
if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
echo ">>> Running use_existing_torch.py to reset torch dependencies for nightly build" && \
cd /tmp && python3 use_existing_torch.py; \
fi && \
uv pip install --system -r /tmp/requirements-cuda.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && \
rm /tmp/requirements-cuda.txt /tmp/common.txt
--extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} && \
rm /tmp/requirements-cuda.txt /tmp/common.txt /tmp/use_existing_torch.py
# Install FlashInfer pre-compiled kernel cache and binaries
# This is ~1.1GB and only changes when FlashInfer version bumps
@ -518,12 +582,14 @@ ARG PIP_INDEX_URL UV_INDEX_URL
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
ARG PYTORCH_CUDA_INDEX_BASE_URL
ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER
ARG USE_TORCH_NIGHTLY
# Install vllm wheel first, so that torch etc will be installed.
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
--mount=type=cache,target=/root/.cache/uv \
. /etc/environment && \
uv pip install --system dist/*.whl --verbose \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
--extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
RUN --mount=type=cache,target=/root/.cache/uv \
. /etc/environment && \
@ -544,8 +610,9 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
# Install EP kernels wheels (pplx-kernels and DeepEP) that have been built in the `build` stage
RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \
--mount=type=cache,target=/root/.cache/uv \
. /etc/environment && \
uv pip install --system ep_kernels/dist/*.whl --verbose \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
--extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
# CUDA image changed from /usr/local/nvidia to /usr/local/cuda in 12.8 but will
# return to /usr/local/nvidia in 13.0 to allow container providers to mount drivers
@ -569,7 +636,6 @@ ARG PYTHON_VERSION
ARG PIP_INDEX_URL UV_INDEX_URL
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
ARG PYTORCH_CUDA_INDEX_BASE_URL
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
# Reference: https://github.com/astral-sh/uv/pull/1694
@ -587,8 +653,9 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
RUN --mount=type=cache,target=/root/.cache/uv \
CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
if [ "$CUDA_MAJOR" -ge 12 ]; then \
. /etc/environment && \
uv pip install --system -r requirements/dev.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
--extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}; \
fi
# install development dependencies (for testing)

View File

@ -2,17 +2,85 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import glob
import os
import re
# Collect all files to process
files_to_process = list(glob.glob("requirements/*.txt"))
# Add pyproject.toml if it exists
if os.path.exists("pyproject.toml"):
files_to_process.append("pyproject.toml")
# Pattern to match torch package names we want to unpin
TORCH_PACKAGES = ['torch', 'torchaudio', 'torchvision', 'triton']
def unpin_torch_dependency(line):
"""Remove version pinning from torch-related packages, keep the package name."""
original_line = line
line_stripped = line.strip()
# Skip empty lines
if not line_stripped:
return line
# Skip full comment lines
if line_stripped.startswith('#'):
return line
# Check if this line contains a torch package
for pkg in TORCH_PACKAGES:
# Check if line starts with the package name (case insensitive)
if line_stripped.lower().startswith(pkg):
# Extract inline comment if present
comment = ''
if '#' in line:
pkg_and_version, comment = line.split('#', 1)
comment = ' #' + comment.rstrip('\n')
else:
pkg_and_version = line
# Check if there's a version specifier
# Matches any version constraint operators: ==, >=, <=, >, <, !=, ~=
if re.search(r'[=<>!~]', pkg_and_version):
# Get original capitalization of package name from the original line
orig_pkg = line_stripped.split()[0] if line_stripped.split() else pkg
# Extract just the package name without any version info
orig_pkg = re.split(r'[=<>!~]', orig_pkg)[0]
result = f"{orig_pkg}{comment}\n" if comment else f"{orig_pkg}\n"
print(f" unpinned: {line.strip()} -> {result.strip()}")
return result
return line
for file in files_to_process:
if not os.path.exists(file):
print(f">>> skipping {file} (does not exist)")
continue
for file in (*glob.glob("requirements/*.txt"), "pyproject.toml"):
print(f">>> cleaning {file}")
with open(file) as f:
lines = f.readlines()
if "torch" in "".join(lines).lower():
print("removed:")
with open(file, "w") as f:
for line in lines:
if "torch" not in line.lower():
f.write(line)
else:
print(line.strip())
try:
with open(file) as f:
lines = f.readlines()
except Exception as e:
print(f"!!! error reading {file}: {e}")
continue
# Check if we need to process this file
has_torch = any(any(pkg in line.lower() for pkg in TORCH_PACKAGES) for line in lines)
if has_torch:
print("unpinning torch dependencies:")
try:
with open(file, "w") as f:
for line in lines:
new_line = unpin_torch_dependency(line)
f.write(new_line)
except Exception as e:
print(f"!!! error writing {file}: {e}")
continue
else:
print(" (no torch dependencies found)")
print(f"<<< done cleaning {file}\n")