mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-17 06:07:03 +08:00
Adopt Dockerfile to build nightly version
This commit is contained in:
parent
f790068600
commit
0b9c701495
@ -196,3 +196,69 @@ steps:
|
||||
env:
|
||||
DOCKER_BUILDKIT: "1"
|
||||
DOCKERHUB_USERNAME: "vllmbot"
|
||||
|
||||
# Build nightly torch Docker images (x86)
|
||||
- label: "Build nightly torch image (x86)"
|
||||
depends_on: ~
|
||||
id: build-nightly-torch-image-x86
|
||||
if: build.env("NIGHTLY") == "1"
|
||||
agents:
|
||||
queue: cpu_queue_postmerge
|
||||
commands:
|
||||
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
||||
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg USE_TORCH_NIGHTLY=true --build-arg FLASHINFER_AOT_COMPILE=true --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 --target vllm-openai --progress plain -f docker/Dockerfile ."
|
||||
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64"
|
||||
|
||||
# Build nightly torch Docker images (arm64)
|
||||
- label: "Build nightly torch image (arm64)"
|
||||
depends_on: ~
|
||||
id: build-nightly-torch-image-arm64
|
||||
if: build.env("NIGHTLY") == "1"
|
||||
agents:
|
||||
queue: arm64_cpu_queue_postmerge
|
||||
commands:
|
||||
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
||||
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg USE_TORCH_NIGHTLY=true --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 --target vllm-openai --progress plain -f docker/Dockerfile ."
|
||||
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64"
|
||||
|
||||
# Create multi-arch manifest for nightly torch images
|
||||
- label: "Create nightly torch multi-arch manifest"
|
||||
depends_on:
|
||||
- build-nightly-torch-image-x86
|
||||
- build-nightly-torch-image-arm64
|
||||
id: create-nightly-torch-multi-arch-manifest
|
||||
if: build.env("NIGHTLY") == "1"
|
||||
agents:
|
||||
queue: cpu_queue_postmerge
|
||||
commands:
|
||||
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
||||
- "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 --amend"
|
||||
- "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly"
|
||||
|
||||
# Publish nightly torch images to DockerHub
|
||||
- label: "Publish nightly torch images to DockerHub"
|
||||
depends_on:
|
||||
- create-nightly-torch-multi-arch-manifest
|
||||
if: build.env("NIGHTLY") == "1"
|
||||
agents:
|
||||
queue: cpu_queue_postmerge
|
||||
commands:
|
||||
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
||||
- "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64"
|
||||
- "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64"
|
||||
- "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-x86_64"
|
||||
- "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-torch-nightly-aarch64 vllm/vllm-openai:torch-nightly-aarch64"
|
||||
- "docker push vllm/vllm-openai:torch-nightly-x86_64"
|
||||
- "docker push vllm/vllm-openai:torch-nightly-aarch64"
|
||||
- "docker manifest create vllm/vllm-openai:torch-nightly vllm/vllm-openai:torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-aarch64 --amend"
|
||||
- "docker manifest create vllm/vllm-openai:torch-nightly-$BUILDKITE_COMMIT vllm/vllm-openai:torch-nightly-x86_64 vllm/vllm-openai:torch-nightly-aarch64 --amend"
|
||||
- "docker manifest push vllm/vllm-openai:torch-nightly"
|
||||
- "docker manifest push vllm/vllm-openai:torch-nightly-$BUILDKITE_COMMIT"
|
||||
plugins:
|
||||
- docker-login#v3.0.0:
|
||||
username: vllmbot
|
||||
password-env: DOCKERHUB_TOKEN
|
||||
env:
|
||||
DOCKER_BUILDKIT: "1"
|
||||
DOCKERHUB_USERNAME: "vllmbot"
|
||||
|
||||
|
||||
@ -57,6 +57,9 @@ ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
|
||||
# PyTorch provides its own indexes for standard and nightly builds
|
||||
ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl
|
||||
|
||||
# Flag to install PyTorch nightly instead of stable
|
||||
ARG USE_TORCH_NIGHTLY=false
|
||||
|
||||
# PIP supports multiple authentication schemes, including keyring
|
||||
# By parameterizing the PIP_KEYRING_PROVIDER variable and setting it to
|
||||
# disabled by default, we allow third-party to use keyring authentication for
|
||||
@ -128,6 +131,7 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
|
||||
# Install PyTorch and core CUDA dependencies
|
||||
# This is ~2GB and rarely changes
|
||||
ARG PYTORCH_CUDA_INDEX_BASE_URL
|
||||
ARG USE_TORCH_NIGHTLY
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
@ -135,8 +139,15 @@ WORKDIR /workspace
|
||||
COPY requirements/common.txt requirements/common.txt
|
||||
COPY requirements/cuda.txt requirements/cuda.txt
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
|
||||
PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
|
||||
PRERELEASE_FLAG="--prerelease=allow"; \
|
||||
else \
|
||||
PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
|
||||
PRERELEASE_FLAG=""; \
|
||||
fi && \
|
||||
uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||
--extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
|
||||
|
||||
# CUDA arch list used by torch
|
||||
# Explicitly set the list to avoid issues with torch 2.2
|
||||
@ -152,6 +163,7 @@ ARG TARGETPLATFORM
|
||||
ARG PIP_INDEX_URL UV_INDEX_URL
|
||||
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
|
||||
ARG PYTORCH_CUDA_INDEX_BASE_URL
|
||||
ARG USE_TORCH_NIGHTLY
|
||||
|
||||
# install build dependencies
|
||||
COPY requirements/build.txt requirements/build.txt
|
||||
@ -164,8 +176,15 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
||||
ENV UV_LINK_MODE=copy
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
|
||||
PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
|
||||
PRERELEASE_FLAG="--prerelease=allow"; \
|
||||
else \
|
||||
PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
|
||||
PRERELEASE_FLAG=""; \
|
||||
fi && \
|
||||
uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||
--extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
@ -285,6 +304,7 @@ ARG TARGETPLATFORM
|
||||
ARG PIP_INDEX_URL UV_INDEX_URL
|
||||
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
|
||||
ARG PYTORCH_CUDA_INDEX_BASE_URL
|
||||
ARG USE_TORCH_NIGHTLY
|
||||
|
||||
# install build dependencies
|
||||
COPY requirements/build.txt requirements/build.txt
|
||||
@ -297,8 +317,15 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
||||
ENV UV_LINK_MODE=copy
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
|
||||
PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
|
||||
PRERELEASE_FLAG="--prerelease=allow"; \
|
||||
else \
|
||||
PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
|
||||
PRERELEASE_FLAG=""; \
|
||||
fi && \
|
||||
uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||
--extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
@ -347,6 +374,7 @@ FROM base AS dev
|
||||
ARG PIP_INDEX_URL UV_INDEX_URL
|
||||
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
|
||||
ARG PYTORCH_CUDA_INDEX_BASE_URL
|
||||
ARG USE_TORCH_NIGHTLY
|
||||
|
||||
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
|
||||
# Reference: https://github.com/astral-sh/uv/pull/1694
|
||||
@ -361,8 +389,15 @@ COPY requirements/lint.txt requirements/lint.txt
|
||||
COPY requirements/test.txt requirements/test.txt
|
||||
COPY requirements/dev.txt requirements/dev.txt
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
|
||||
PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
|
||||
PRERELEASE_FLAG="--prerelease=allow"; \
|
||||
else \
|
||||
PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
|
||||
PRERELEASE_FLAG=""; \
|
||||
fi && \
|
||||
uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||
--extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
|
||||
#################### DEV IMAGE ####################
|
||||
#################### vLLM installation IMAGE ####################
|
||||
# image with vLLM installed
|
||||
@ -456,11 +491,19 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
|
||||
# Install PyTorch and core CUDA dependencies
|
||||
# This is ~2GB and rarely changes
|
||||
ARG PYTORCH_CUDA_INDEX_BASE_URL
|
||||
ARG USE_TORCH_NIGHTLY
|
||||
COPY requirements/common.txt /tmp/common.txt
|
||||
COPY requirements/cuda.txt /tmp/requirements-cuda.txt
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
|
||||
PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
|
||||
PRERELEASE_FLAG="--prerelease=allow"; \
|
||||
else \
|
||||
PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
|
||||
PRERELEASE_FLAG=""; \
|
||||
fi && \
|
||||
uv pip install --system -r /tmp/requirements-cuda.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && \
|
||||
--extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG} && \
|
||||
rm /tmp/requirements-cuda.txt /tmp/common.txt
|
||||
|
||||
# Install FlashInfer pre-compiled kernel cache and binaries
|
||||
@ -512,12 +555,20 @@ ARG PIP_INDEX_URL UV_INDEX_URL
|
||||
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
|
||||
ARG PYTORCH_CUDA_INDEX_BASE_URL
|
||||
ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER
|
||||
ARG USE_TORCH_NIGHTLY
|
||||
|
||||
# Install vllm wheel first, so that torch etc will be installed.
|
||||
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
|
||||
--mount=type=cache,target=/root/.cache/uv \
|
||||
if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
|
||||
PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
|
||||
PRERELEASE_FLAG="--prerelease=allow"; \
|
||||
else \
|
||||
PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
|
||||
PRERELEASE_FLAG=""; \
|
||||
fi && \
|
||||
uv pip install --system dist/*.whl --verbose \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||
--extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
. /etc/environment && \
|
||||
@ -538,8 +589,15 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
|
||||
# Install EP kernels wheels (pplx-kernels and DeepEP) that have been built in the `build` stage
|
||||
RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \
|
||||
--mount=type=cache,target=/root/.cache/uv \
|
||||
if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
|
||||
PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
|
||||
PRERELEASE_FLAG="--prerelease=allow"; \
|
||||
else \
|
||||
PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
|
||||
PRERELEASE_FLAG=""; \
|
||||
fi && \
|
||||
uv pip install --system ep_kernels/dist/*.whl --verbose \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||
--extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}
|
||||
|
||||
# CUDA image changed from /usr/local/nvidia to /usr/local/cuda in 12.8 but will
|
||||
# return to /usr/local/nvidia in 13.0 to allow container providers to mount drivers
|
||||
@ -564,6 +622,7 @@ ARG PYTHON_VERSION
|
||||
ARG PIP_INDEX_URL UV_INDEX_URL
|
||||
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
|
||||
ARG PYTORCH_CUDA_INDEX_BASE_URL
|
||||
ARG USE_TORCH_NIGHTLY
|
||||
|
||||
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
|
||||
# Reference: https://github.com/astral-sh/uv/pull/1694
|
||||
@ -581,8 +640,15 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
|
||||
if [ "$CUDA_MAJOR" -ge 12 ]; then \
|
||||
if [ "$USE_TORCH_NIGHTLY" = "true" ]; then \
|
||||
PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
|
||||
PRERELEASE_FLAG="--prerelease=allow"; \
|
||||
else \
|
||||
PYTORCH_INDEX="${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')"; \
|
||||
PRERELEASE_FLAG=""; \
|
||||
fi && \
|
||||
uv pip install --system -r requirements/dev.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
||||
--extra-index-url ${PYTORCH_INDEX} ${PRERELEASE_FLAG}; \
|
||||
fi
|
||||
|
||||
# install development dependencies (for testing)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user