diff --git a/docker/Dockerfile b/docker/Dockerfile index 8d4375470adf9..a71b052bfca25 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -6,30 +6,106 @@ # docs/assets/contributing/dockerfile-stages-dependency.png ARG CUDA_VERSION=12.8.1 +ARG PYTHON_VERSION=3.12 + +# By parameterizing the base images, we allow third-party to use their own +# base images. One use case is hermetic builds with base images stored in +# private registries that use a different repository naming conventions. +# +# Example: +# docker build --build-arg BUILD_BASE_IMAGE=registry.acme.org/mirror/nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 +ARG BUILD_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 +ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 + +# By parameterizing the Deadsnakes repository URL, we allow third-party to use +# their own mirror. When doing so, we don't benefit from the transparent +# installation of the GPG key of the PPA, as done by add-apt-repository, so we +# also need a URL for the GPG key. +ARG DEADSNAKES_MIRROR_URL +ARG DEADSNAKES_GPGKEY_URL + +# The PyPA get-pip.py script is a self contained script+zip file, that provides +# both the installer script and the pip base85-encoded zip archive. This allows +# bootstrapping pip in environment where a dsitribution package does not exist. +# +# By parameterizing the URL for get-pip.py installation script, we allow +# third-party to use their own copy of the script stored in a private mirror. +# We set the default value to the PyPA owned get-pip.py script. +# +# Reference: https://pip.pypa.io/en/stable/installation/#get-pip-py +ARG GET_PIP_URL="https://bootstrap.pypa.io/get-pip.py" + +# PIP supports fetching the packages from custom indexes, allowing third-party +# to host the packages in private mirrors. The PIP_INDEX_URL and +# PIP_EXTRA_INDEX_URL are standard PIP environment variables to override the +# default indexes. By letting them empty by default, PIP will use its default +# indexes if the build process doesn't override the indexes. +# +# Uv uses different variables. We set them by default to the same values as +# PIP, but they can be overridden. +ARG PIP_INDEX_URL +ARG PIP_EXTRA_INDEX_URL +ARG UV_INDEX_URL=${PIP_INDEX_URL} +ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} + +# PyTorch provides its own indexes for standard and nightly builds +ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl +ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly + +# PIP supports multiple authentication schemes, including keyring +# By parameterizing the PIP_KEYRING_PROVIDER variable and setting it to +# disabled by default, we allow third-party to use keyring authentication for +# their private Python indexes, while not changing the default behavior which +# is no authentication. +# +# Reference: https://pip.pypa.io/en/stable/topics/authentication/#keyring-support +ARG PIP_KEYRING_PROVIDER=disabled +ARG UV_KEYRING_PROVIDER=${PIP_KEYRING_PROVIDER} + #################### BASE BUILD IMAGE #################### # prepare basic build environment -FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS base -ARG CUDA_VERSION=12.8.1 -ARG PYTHON_VERSION=3.12 +FROM ${BUILD_BASE_IMAGE} AS base +ARG CUDA_VERSION +ARG PYTHON_VERSION ARG TARGETPLATFORM ENV DEBIAN_FRONTEND=noninteractive +ARG DEADSNAKES_MIRROR_URL +ARG DEADSNAKES_GPGKEY_URL +ARG GET_PIP_URL + # Install Python and other dependencies RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ && apt-get update -y \ && apt-get install -y ccache software-properties-common git curl sudo \ - && for i in 1 2 3; do \ - add-apt-repository -y ppa:deadsnakes/ppa && break || \ - { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ - done \ + && if [ ! -z ${DEADSNAKES_MIRROR_URL} ] ; then \ + if [ ! -z "${DEADSNAKES_GPGKEY_URL}" ] ; then \ + mkdir -p -m 0755 /etc/apt/keyrings ; \ + curl -L ${DEADSNAKES_GPGKEY_URL} | gpg --dearmor > /etc/apt/keyrings/deadsnakes.gpg ; \ + sudo chmod 644 /etc/apt/keyrings/deadsnakes.gpg ; \ + echo "deb [signed-by=/etc/apt/keyrings/deadsnakes.gpg] ${DEADSNAKES_MIRROR_URL} $(lsb_release -cs) main" > /etc/apt/sources.list.d/deadsnakes.list ; \ + fi ; \ + else \ + for i in 1 2 3; do \ + add-apt-repository -y ppa:deadsnakes/ppa && break || \ + { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ + done ; \ + fi \ && apt-get update -y \ && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \ && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \ && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \ - && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \ + && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} \ && python3 --version && python3 -m pip --version + +ARG PIP_INDEX_URL UV_INDEX_URL +ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL +ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL +ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER + # Install uv for faster pip installs RUN --mount=type=cache,target=/root/.cache/uv \ python3 -m pip install uv @@ -63,15 +139,19 @@ WORKDIR /workspace # after this step RUN --mount=type=cache,target=/root/.cache/uv \ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ - uv pip install --system --index-url https://download.pytorch.org/whl/nightly/cu128 "torch==2.8.0.dev20250318+cu128" "torchvision==0.22.0.dev20250319"; \ - uv pip install --system --index-url https://download.pytorch.org/whl/nightly/cu128 --pre pytorch_triton==3.3.0+gitab727c40; \ + uv pip install --system \ + --index-url ${PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ + "torch==2.8.0.dev20250318+cu128" "torchvision==0.22.0.dev20250319"; \ + uv pip install --system \ + --index-url ${PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ + --pre pytorch_triton==3.3.0+gitab727c40; \ fi COPY requirements/common.txt requirements/common.txt COPY requirements/cuda.txt requirements/cuda.txt RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system -r requirements/cuda.txt \ - --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') # cuda arch list used by torch # can be useful for both `dev` and `test` @@ -88,6 +168,10 @@ ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches} FROM base AS build ARG TARGETPLATFORM +ARG PIP_INDEX_URL UV_INDEX_URL +ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL +ARG PYTORCH_CUDA_INDEX_BASE_URL + # install build dependencies COPY requirements/build.txt requirements/build.txt @@ -98,7 +182,7 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system -r requirements/build.txt \ - --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') COPY . . ARG GIT_REPO_CHECK=0 @@ -113,6 +197,8 @@ ARG nvcc_threads=8 ENV NVCC_THREADS=$nvcc_threads ARG USE_SCCACHE +ARG SCCACHE_DOWNLOAD_URL=https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz +ARG SCCACHE_ENDPOINT ARG SCCACHE_BUCKET_NAME=vllm-build-sccache ARG SCCACHE_REGION_NAME=us-west-2 ARG SCCACHE_S3_NO_CREDENTIALS=0 @@ -121,10 +207,11 @@ RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,source=.git,target=.git \ if [ "$USE_SCCACHE" = "1" ]; then \ echo "Installing sccache..." \ - && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \ + && curl -L -o sccache.tar.gz ${SCCACHE_DOWNLOAD_URL} \ && tar -xzf sccache.tar.gz \ && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \ && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \ + && if [ ! -z ${SCCACHE_ENDPOINT} ] ; then export SCCACHE_ENDPOINT=${SCCACHE_ENDPOINT} ; fi \ && export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \ && export SCCACHE_REGION=${SCCACHE_REGION_NAME} \ && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \ @@ -162,6 +249,10 @@ RUN if [ "$RUN_WHEEL_CHECK" = "true" ]; then \ #################### DEV IMAGE #################### FROM base as dev +ARG PIP_INDEX_URL UV_INDEX_URL +ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL +ARG PYTORCH_CUDA_INDEX_BASE_URL + # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 ENV UV_HTTP_TIMEOUT=500 @@ -176,21 +267,25 @@ COPY requirements/test.txt requirements/test.txt COPY requirements/dev.txt requirements/dev.txt RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system -r requirements/dev.txt \ - --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') #################### DEV IMAGE #################### #################### vLLM installation IMAGE #################### # image with vLLM installed # TODO: Restore to base image after FlashInfer AOT wheel fixed -FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS vllm-base -ARG CUDA_VERSION=12.8.1 -ARG PYTHON_VERSION=3.12 +FROM ${FINAL_BASE_IMAGE} AS vllm-base +ARG CUDA_VERSION +ARG PYTHON_VERSION WORKDIR /vllm-workspace ENV DEBIAN_FRONTEND=noninteractive ARG TARGETPLATFORM SHELL ["/bin/bash", "-c"] +ARG DEADSNAKES_MIRROR_URL +ARG DEADSNAKES_GPGKEY_URL +ARG GET_PIP_URL + RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \ echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment @@ -200,17 +295,33 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ && apt-get update -y \ && apt-get install -y ccache software-properties-common git curl wget sudo vim python3-pip \ && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \ - && for i in 1 2 3; do \ - add-apt-repository -y ppa:deadsnakes/ppa && break || \ - { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ - done \ + && if [ ! -z ${DEADSNAKES_MIRROR_URL} ] ; then \ + if [ ! -z "${DEADSNAKES_GPGKEY_URL}" ] ; then \ + mkdir -p -m 0755 /etc/apt/keyrings ; \ + curl -L ${DEADSNAKES_GPGKEY_URL} | gpg --dearmor > /etc/apt/keyrings/deadsnakes.gpg ; \ + sudo chmod 644 /etc/apt/keyrings/deadsnakes.gpg ; \ + echo "deb [signed-by=/etc/apt/keyrings/deadsnakes.gpg] ${DEADSNAKES_MIRROR_URL} $(lsb_release -cs) main" > /etc/apt/sources.list.d/deadsnakes.list ; \ + fi ; \ + else \ + for i in 1 2 3; do \ + add-apt-repository -y ppa:deadsnakes/ppa && break || \ + { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ + done ; \ + fi \ && apt-get update -y \ && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev \ && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \ && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \ - && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \ + && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} \ && python3 --version && python3 -m pip --version + +ARG PIP_INDEX_URL UV_INDEX_URL +ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL +ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL +ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER + # Install uv for faster pip installs RUN --mount=type=cache,target=/root/.cache/uv \ python3 -m pip install uv @@ -232,15 +343,19 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ # after this step RUN --mount=type=cache,target=/root/.cache/uv \ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ - uv pip install --system --index-url https://download.pytorch.org/whl/nightly/cu128 "torch==2.8.0.dev20250318+cu128" "torchvision==0.22.0.dev20250319"; \ - uv pip install --system --index-url https://download.pytorch.org/whl/nightly/cu128 --pre pytorch_triton==3.3.0+gitab727c40; \ + uv pip install --system \ + --index-url ${PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ + "torch==2.8.0.dev20250318+cu128" "torchvision==0.22.0.dev20250319" ; \ + uv pip install --system \ + --index-url ${PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ + --pre pytorch_triton==3.3.0+gitab727c40 ; \ fi # Install vllm wheel first, so that torch etc will be installed. RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ --mount=type=cache,target=/root/.cache/uv \ uv pip install --system dist/*.whl --verbose \ - --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') # If we need to build FlashInfer wheel before its release: # $ # Note we remove 7.0 from the arch list compared to the list below, since FlashInfer only supports sm75+ @@ -254,15 +369,20 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist # -rw-rw-r-- 1 mgoin mgoin 205M Jun 9 18:03 flashinfer_python-0.2.6.post1-cp39-abi3-linux_x86_64.whl # $ # upload the wheel to a public location, e.g. https://wheels.vllm.ai/flashinfer/v0.2.6.post1/flashinfer_python-0.2.6.post1-cp39-abi3-linux_x86_64.whl +# Allow specifying a version, Git revision or local .whl file +ARG FLASHINFER_CUDA128_INDEX_URL="https://download.pytorch.org/whl/cu128/flashinfer" +ARG FLASHINFER_CUDA128_WHEEL="flashinfer_python-0.2.6.post1%2Bcu128torch2.7-cp39-abi3-linux_x86_64.whl" +ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git" +ARG FLASHINFER_GIT_REF="v0.2.6.post1" RUN --mount=type=cache,target=/root/.cache/uv \ . /etc/environment && \ if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \ # FlashInfer already has a wheel for PyTorch 2.7.0 and CUDA 12.8. This is enough for CI use if [[ "$CUDA_VERSION" == 12.8* ]]; then \ - uv pip install --system https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.6.post1%2Bcu128torch2.7-cp39-abi3-linux_x86_64.whl; \ + uv pip install --system ${FLASHINFER_CUDA128_INDEX_URL}/${FLASHINFER_CUDA128_WHEEL} ; \ else \ export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0a 10.0a 12.0' && \ - git clone https://github.com/flashinfer-ai/flashinfer.git --single-branch --branch v0.2.6.post1 --recursive && \ + git clone ${FLASHINFER_GIT_REPO} --single-branch --branch ${FLASHINFER_GIT_REF} --recursive && \ # Needed to build AOT kernels (cd flashinfer && \ python3 -m flashinfer.aot && \ @@ -286,7 +406,7 @@ uv pip list COPY requirements/build.txt requirements/build.txt RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system -r requirements/build.txt \ - --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') #################### vLLM installation IMAGE #################### @@ -297,6 +417,11 @@ FROM vllm-base AS test ADD . /vllm-workspace/ +ARG PYTHON_VERSION + +ARG PIP_INDEX_URL UV_INDEX_URL +ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL + # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 ENV UV_HTTP_TIMEOUT=500 @@ -307,7 +432,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system --no-build-isolation "git+https://github.com/state-spaces/mamba@v2.2.4" # install development dependencies (for testing) -RUN --mount=type=cache,target=/root/.cache/uv \ +RUN --mount=type=cache,target=/root/.cache/uv \ CUDA_MAJOR="${CUDA_VERSION%%.*}"; \ if [ "$CUDA_MAJOR" -ge 12 ]; then \ uv pip install --system -r requirements/dev.txt; \ @@ -323,7 +448,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ENV HF_HUB_ENABLE_HF_TRANSFER 1 # Copy in the v1 package for testing (it isn't distributed yet) -COPY vllm/v1 /usr/local/lib/python3.12/dist-packages/vllm/v1 +COPY vllm/v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1 # doc requires source code # we hide them inside `test_docs/` , so that this source code @@ -340,6 +465,9 @@ RUN mv mkdocs.yaml test_docs/ FROM vllm-base AS vllm-openai-base ARG TARGETPLATFORM +ARG PIP_INDEX_URL UV_INDEX_URL +ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL + # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 ENV UV_HTTP_TIMEOUT=500