diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index a9d51557bd9bb..c04bfac6644ca 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -196,3 +196,4 @@ steps: env: DOCKER_BUILDKIT: "1" DOCKERHUB_USERNAME: "vllmbot" + diff --git a/docker/Dockerfile b/docker/Dockerfile index 679ffc4a7df5f..c046f1a556e58 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -54,7 +54,11 @@ ARG PIP_EXTRA_INDEX_URL ARG UV_INDEX_URL=${PIP_INDEX_URL} ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} +# Flag to install PyTorch nightly instead of stable +ARG USE_TORCH_NIGHTLY=false + # PyTorch provides its own indexes for standard and nightly builds +# This will be set conditionally based on USE_TORCH_NIGHTLY in the base stage ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl # PIP supports multiple authentication schemes, including keyring @@ -128,15 +132,40 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ # Install PyTorch and core CUDA dependencies # This is ~2GB and rarely changes ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG USE_TORCH_NIGHTLY + +# Set PyTorch index URL based on USE_TORCH_NIGHTLY +# We compute the index URL once and reuse it across all stages +RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_SUFFIX="/nightly"; \ + else \ + PYTORCH_SUFFIX=""; \ + fi && \ + echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}${PYTORCH_SUFFIX}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment && \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \ + else \ + echo "PRERELEASE_FLAG=" >> /etc/environment; \ + fi + +ENV PYTORCH_INDEX="" +ENV PRERELEASE_FLAG="" +RUN . /etc/environment && echo "PYTORCH_INDEX=${PYTORCH_INDEX}" && echo "PRERELEASE_FLAG=${PRERELEASE_FLAG}" WORKDIR /workspace # install build and runtime dependencies COPY requirements/common.txt requirements/common.txt COPY requirements/cuda.txt requirements/cuda.txt +COPY use_existing_torch.py use_existing_torch.py RUN --mount=type=cache,target=/root/.cache/uv \ + . /etc/environment && \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + echo ">>> Running use_existing_torch.py to reset torch dependencies for nightly build" && \ + python3 use_existing_torch.py; \ + fi && \ uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} # CUDA arch list used by torch # Explicitly set the list to avoid issues with torch 2.2 @@ -151,10 +180,10 @@ ARG TARGETPLATFORM ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL -ARG PYTORCH_CUDA_INDEX_BASE_URL # install build dependencies COPY requirements/build.txt requirements/build.txt +COPY use_existing_torch.py use_existing_torch.py # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 @@ -164,8 +193,13 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" ENV UV_LINK_MODE=copy RUN --mount=type=cache,target=/root/.cache/uv \ + . /etc/environment && \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + echo ">>> Running use_existing_torch.py to reset torch dependencies for nightly build" && \ + python3 use_existing_torch.py; \ + fi && \ uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} WORKDIR /workspace @@ -290,10 +324,10 @@ ARG TARGETPLATFORM ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL -ARG PYTORCH_CUDA_INDEX_BASE_URL # install build dependencies COPY requirements/build.txt requirements/build.txt +COPY use_existing_torch.py use_existing_torch.py # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 @@ -303,8 +337,13 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" ENV UV_LINK_MODE=copy RUN --mount=type=cache,target=/root/.cache/uv \ + . /etc/environment && \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + echo ">>> Running use_existing_torch.py to reset torch dependencies for nightly build" && \ + python3 use_existing_torch.py; \ + fi && \ uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} WORKDIR /workspace @@ -352,7 +391,6 @@ FROM base AS dev ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL -ARG PYTORCH_CUDA_INDEX_BASE_URL # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 @@ -367,8 +405,9 @@ COPY requirements/lint.txt requirements/lint.txt COPY requirements/test.txt requirements/test.txt COPY requirements/dev.txt requirements/dev.txt RUN --mount=type=cache,target=/root/.cache/uv \ + . /etc/environment && \ uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} #################### DEV IMAGE #################### #################### vLLM installation IMAGE #################### # image with vLLM installed @@ -462,12 +501,37 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ # Install PyTorch and core CUDA dependencies # This is ~2GB and rarely changes ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG USE_TORCH_NIGHTLY + +# Set PyTorch index URL based on USE_TORCH_NIGHTLY +# We compute the index URL once and reuse it across all stages +RUN if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + PYTORCH_SUFFIX="/nightly"; \ + else \ + PYTORCH_SUFFIX=""; \ + fi && \ + echo "PYTORCH_INDEX=${PYTORCH_CUDA_INDEX_BASE_URL}${PYTORCH_SUFFIX}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')" >> /etc/environment && \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + echo "PRERELEASE_FLAG=--prerelease=allow" >> /etc/environment; \ + else \ + echo "PRERELEASE_FLAG=" >> /etc/environment; \ + fi + +ENV PYTORCH_INDEX="" +ENV PRERELEASE_FLAG="" + COPY requirements/common.txt /tmp/common.txt COPY requirements/cuda.txt /tmp/requirements-cuda.txt +COPY use_existing_torch.py /tmp/use_existing_torch.py RUN --mount=type=cache,target=/root/.cache/uv \ + . /etc/environment && \ + if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \ + echo ">>> Running use_existing_torch.py to reset torch dependencies for nightly build" && \ + cd /tmp && python3 use_existing_torch.py; \ + fi && \ uv pip install --system -r /tmp/requirements-cuda.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && \ - rm /tmp/requirements-cuda.txt /tmp/common.txt + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} && \ + rm /tmp/requirements-cuda.txt /tmp/common.txt /tmp/use_existing_torch.py # Install FlashInfer pre-compiled kernel cache and binaries # This is ~1.1GB and only changes when FlashInfer version bumps @@ -518,12 +582,14 @@ ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER +ARG USE_TORCH_NIGHTLY # Install vllm wheel first, so that torch etc will be installed. RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ --mount=type=cache,target=/root/.cache/uv \ + . /etc/environment && \ uv pip install --system dist/*.whl --verbose \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} RUN --mount=type=cache,target=/root/.cache/uv \ . /etc/environment && \ @@ -544,8 +610,9 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH # Install EP kernels wheels (pplx-kernels and DeepEP) that have been built in the `build` stage RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \ --mount=type=cache,target=/root/.cache/uv \ + . /etc/environment && \ uv pip install --system ep_kernels/dist/*.whl --verbose \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} # CUDA image changed from /usr/local/nvidia to /usr/local/cuda in 12.8 but will # return to /usr/local/nvidia in 13.0 to allow container providers to mount drivers @@ -569,7 +636,6 @@ ARG PYTHON_VERSION ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL -ARG PYTORCH_CUDA_INDEX_BASE_URL # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 @@ -587,8 +653,9 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ RUN --mount=type=cache,target=/root/.cache/uv \ CUDA_MAJOR="${CUDA_VERSION%%.*}"; \ if [ "$CUDA_MAJOR" -ge 12 ]; then \ + . /etc/environment && \ uv pip install --system -r requirements/dev.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ + --extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}; \ fi # install development dependencies (for testing) diff --git a/use_existing_torch.py b/use_existing_torch.py index e2d3f2ec81956..93274de8ce99a 100644 --- a/use_existing_torch.py +++ b/use_existing_torch.py @@ -2,17 +2,85 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import glob +import os +import re + +# Collect all files to process +files_to_process = list(glob.glob("requirements/*.txt")) + +# Add pyproject.toml if it exists +if os.path.exists("pyproject.toml"): + files_to_process.append("pyproject.toml") + +# Pattern to match torch package names we want to unpin +TORCH_PACKAGES = ['torch', 'torchaudio', 'torchvision', 'triton'] + +def unpin_torch_dependency(line): + """Remove version pinning from torch-related packages, keep the package name.""" + original_line = line + line_stripped = line.strip() + + # Skip empty lines + if not line_stripped: + return line + + # Skip full comment lines + if line_stripped.startswith('#'): + return line + + # Check if this line contains a torch package + for pkg in TORCH_PACKAGES: + # Check if line starts with the package name (case insensitive) + if line_stripped.lower().startswith(pkg): + # Extract inline comment if present + comment = '' + if '#' in line: + pkg_and_version, comment = line.split('#', 1) + comment = ' #' + comment.rstrip('\n') + else: + pkg_and_version = line + + # Check if there's a version specifier + # Matches any version constraint operators: ==, >=, <=, >, <, !=, ~= + if re.search(r'[=<>!~]', pkg_and_version): + # Get original capitalization of package name from the original line + orig_pkg = line_stripped.split()[0] if line_stripped.split() else pkg + # Extract just the package name without any version info + orig_pkg = re.split(r'[=<>!~]', orig_pkg)[0] + + result = f"{orig_pkg}{comment}\n" if comment else f"{orig_pkg}\n" + print(f" unpinned: {line.strip()} -> {result.strip()}") + return result + + return line + +for file in files_to_process: + if not os.path.exists(file): + print(f">>> skipping {file} (does not exist)") + continue -for file in (*glob.glob("requirements/*.txt"), "pyproject.toml"): print(f">>> cleaning {file}") - with open(file) as f: - lines = f.readlines() - if "torch" in "".join(lines).lower(): - print("removed:") - with open(file, "w") as f: - for line in lines: - if "torch" not in line.lower(): - f.write(line) - else: - print(line.strip()) + try: + with open(file) as f: + lines = f.readlines() + except Exception as e: + print(f"!!! error reading {file}: {e}") + continue + + # Check if we need to process this file + has_torch = any(any(pkg in line.lower() for pkg in TORCH_PACKAGES) for line in lines) + + if has_torch: + print("unpinning torch dependencies:") + try: + with open(file, "w") as f: + for line in lines: + new_line = unpin_torch_dependency(line) + f.write(new_line) + except Exception as e: + print(f"!!! error writing {file}: {e}") + continue + else: + print(" (no torch dependencies found)") + print(f"<<< done cleaning {file}\n")