mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 05:24:54 +08:00
269 lines
9.8 KiB
Docker
269 lines
9.8 KiB
Docker
# This vLLM Dockerfile is used to build images that can run vLLM on both x86_64 and arm64 CPU platforms.
|
|
#
|
|
# Supported platforms:
|
|
# - linux/amd64 (x86_64)
|
|
# - linux/arm64 (aarch64)
|
|
#
|
|
# Use the `--platform` option with `docker buildx build` to specify the target architecture, e.g.:
|
|
# docker buildx build --platform=linux/arm64 -f docker/Dockerfile.cpu .
|
|
#
|
|
# Build targets:
|
|
# vllm-openai (default): used for serving deployment
|
|
# vllm-test: used for CI tests
|
|
# vllm-dev: used for development
|
|
#
|
|
# Build arguments:
|
|
# PYTHON_VERSION=3.13|3.12 (default)|3.11|3.10
|
|
# VLLM_CPU_DISABLE_AVX512=false (default)|true
|
|
# VLLM_CPU_AVX512BF16=false (default)|true
|
|
# VLLM_CPU_AVX512VNNI=false (default)|true
|
|
#
|
|
|
|
######################### COMMON BASE IMAGE #########################
|
|
FROM ubuntu:22.04 AS base-common
|
|
|
|
WORKDIR /workspace/
|
|
|
|
ARG PYTHON_VERSION=3.12
|
|
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
|
|
|
|
# Install minimal dependencies and uv
|
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
|
apt-get update -y \
|
|
&& apt-get install -y --no-install-recommends ccache git curl wget ca-certificates \
|
|
gcc-12 g++-12 libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 jq lsof \
|
|
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 \
|
|
&& curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
|
|
ENV CCACHE_DIR=/root/.cache/ccache
|
|
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
|
|
|
ENV PATH="/root/.local/bin:$PATH"
|
|
ENV VIRTUAL_ENV="/opt/venv"
|
|
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
|
|
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
|
|
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
|
|
|
|
ENV UV_HTTP_TIMEOUT=500
|
|
|
|
# Install Python dependencies
|
|
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
|
|
ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
|
|
ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
|
ENV UV_LINK_MODE="copy"
|
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
--mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \
|
|
--mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \
|
|
uv pip install --upgrade pip && \
|
|
uv pip install -r requirements/cpu.txt
|
|
|
|
ARG TARGETARCH
|
|
ENV TARGETARCH=${TARGETARCH}
|
|
|
|
######################### x86_64 BASE IMAGE #########################
|
|
FROM base-common AS base-amd64
|
|
|
|
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so"
|
|
|
|
######################### arm64 BASE IMAGE #########################
|
|
FROM base-common AS base-arm64
|
|
|
|
ENV LD_PRELOAD="/usr/lib/aarch64-linux-gnu/libtcmalloc_minimal.so.4"
|
|
|
|
######################### BASE IMAGE #########################
|
|
FROM base-${TARGETARCH} AS base
|
|
|
|
RUN echo 'ulimit -c 0' >> ~/.bashrc
|
|
|
|
######################### BUILD IMAGE #########################
|
|
FROM base AS vllm-build
|
|
|
|
ARG GIT_REPO_CHECK=0
|
|
# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
|
|
ARG VLLM_CPU_DISABLE_AVX512=0
|
|
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
|
|
# Support for building with AVX512BF16 ISA: docker build --build-arg VLLM_CPU_AVX512BF16="true" ...
|
|
ARG VLLM_CPU_AVX512BF16=0
|
|
ENV VLLM_CPU_AVX512BF16=${VLLM_CPU_AVX512BF16}
|
|
# Support for building with AVX512VNNI ISA: docker build --build-arg VLLM_CPU_AVX512VNNI="true" ...
|
|
ARG VLLM_CPU_AVX512VNNI=0
|
|
ENV VLLM_CPU_AVX512VNNI=${VLLM_CPU_AVX512VNNI}
|
|
|
|
WORKDIR /workspace/vllm
|
|
|
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
--mount=type=bind,src=requirements/cpu-build.txt,target=requirements/build.txt \
|
|
uv pip install -r requirements/build.txt
|
|
|
|
COPY . .
|
|
RUN --mount=type=bind,source=.git,target=.git \
|
|
if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
|
|
|
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
--mount=type=cache,target=/root/.cache/ccache \
|
|
--mount=type=cache,target=/workspace/vllm/.deps,sharing=locked \
|
|
--mount=type=bind,source=.git,target=.git \
|
|
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel
|
|
|
|
#################### WHEEL BUILD IMAGE ####################
|
|
FROM base AS build
|
|
ARG TARGETPLATFORM
|
|
|
|
ARG PIP_INDEX_URL UV_INDEX_URL
|
|
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
|
|
|
|
# install build dependencies
|
|
COPY requirements/build.txt requirements/build.txt
|
|
|
|
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
|
|
# Reference: https://github.com/astral-sh/uv/pull/1694
|
|
ENV UV_HTTP_TIMEOUT=500
|
|
ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
|
# Use copy mode to avoid hardlink failures with Docker cache mounts
|
|
ENV UV_LINK_MODE=copy
|
|
|
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt
|
|
|
|
COPY . .
|
|
ARG GIT_REPO_CHECK=0
|
|
RUN --mount=type=bind,source=.git,target=.git \
|
|
if [ "$GIT_REPO_CHECK" != "0" ]; then bash tools/check_repo.sh ; fi
|
|
|
|
# max jobs used by Ninja to build extensions
|
|
ARG max_jobs=2
|
|
ENV MAX_JOBS=${max_jobs}
|
|
|
|
ARG USE_SCCACHE
|
|
ARG SCCACHE_DOWNLOAD_URL=https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz
|
|
ARG SCCACHE_ENDPOINT
|
|
ARG SCCACHE_BUCKET_NAME=vllm-build-sccache
|
|
ARG SCCACHE_REGION_NAME=us-west-2
|
|
ARG SCCACHE_S3_NO_CREDENTIALS=0
|
|
|
|
# Flag to control whether to use pre-built vLLM wheels
|
|
ARG VLLM_USE_PRECOMPILED=""
|
|
|
|
# if USE_SCCACHE is set, use sccache to speed up compilation
|
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
--mount=type=bind,source=.git,target=.git \
|
|
if [ "$USE_SCCACHE" = "1" ]; then \
|
|
echo "Installing sccache..." \
|
|
&& curl -L -o sccache.tar.gz ${SCCACHE_DOWNLOAD_URL} \
|
|
&& tar -xzf sccache.tar.gz \
|
|
&& sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \
|
|
&& rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \
|
|
&& if [ ! -z ${SCCACHE_ENDPOINT} ] ; then export SCCACHE_ENDPOINT=${SCCACHE_ENDPOINT} ; fi \
|
|
&& export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \
|
|
&& export SCCACHE_REGION=${SCCACHE_REGION_NAME} \
|
|
&& export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
|
|
&& export SCCACHE_IDLE_TIMEOUT=0 \
|
|
&& export CMAKE_BUILD_TYPE=Release \
|
|
&& export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" \
|
|
&& export VLLM_DOCKER_BUILD_CONTEXT=1 \
|
|
&& sccache --show-stats \
|
|
&& python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
|
|
&& sccache --show-stats; \
|
|
fi
|
|
|
|
ARG vllm_target_device="cpu"
|
|
ENV VLLM_TARGET_DEVICE=${vllm_target_device}
|
|
ENV CCACHE_DIR=/root/.cache/ccache
|
|
RUN --mount=type=cache,target=/root/.cache/ccache \
|
|
--mount=type=cache,target=/root/.cache/uv \
|
|
--mount=type=bind,source=.git,target=.git \
|
|
if [ "$USE_SCCACHE" != "1" ]; then \
|
|
# Clean any existing CMake artifacts
|
|
rm -rf .deps && \
|
|
mkdir -p .deps && \
|
|
export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" && \
|
|
export VLLM_DOCKER_BUILD_CONTEXT=1 && \
|
|
python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
|
|
fi
|
|
|
|
# Check the size of the wheel if RUN_WHEEL_CHECK is true
|
|
COPY .buildkite/check-wheel-size.py check-wheel-size.py
|
|
# sync the default value with .buildkite/check-wheel-size.py
|
|
ARG VLLM_MAX_SIZE_MB=450
|
|
ENV VLLM_MAX_SIZE_MB=$VLLM_MAX_SIZE_MB
|
|
ARG RUN_WHEEL_CHECK=true
|
|
RUN if [ "$RUN_WHEEL_CHECK" = "true" ]; then \
|
|
python3 check-wheel-size.py dist; \
|
|
else \
|
|
echo "Skipping wheel size check."; \
|
|
fi
|
|
|
|
######################### TEST DEPS #########################
|
|
FROM base AS vllm-test-deps
|
|
|
|
WORKDIR /workspace/vllm
|
|
|
|
RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \
|
|
cp requirements/test.in requirements/cpu-test.in && \
|
|
sed -i '/mamba_ssm/d' requirements/cpu-test.in && \
|
|
uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu
|
|
|
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
uv pip install -r requirements/cpu-test.txt
|
|
|
|
######################### DEV IMAGE #########################
|
|
FROM vllm-build AS vllm-dev
|
|
|
|
WORKDIR /workspace/vllm
|
|
|
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
|
apt-get install -y --no-install-recommends vim numactl xz-utils
|
|
|
|
# install development dependencies (for testing)
|
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
uv pip install -e tests/vllm_test_utils
|
|
|
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
--mount=type=cache,target=/root/.cache/ccache \
|
|
--mount=type=bind,source=.git,target=.git \
|
|
VLLM_TARGET_DEVICE=cpu python3 setup.py develop
|
|
|
|
COPY --from=vllm-test-deps /workspace/vllm/requirements/cpu-test.txt requirements/test.txt
|
|
|
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
uv pip install -r requirements/dev.txt && \
|
|
pre-commit install --hook-type pre-commit --hook-type commit-msg
|
|
|
|
ENTRYPOINT ["bash"]
|
|
|
|
######################### TEST IMAGE #########################
|
|
FROM vllm-test-deps AS vllm-test
|
|
|
|
WORKDIR /workspace/
|
|
|
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
--mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
|
|
uv pip install dist/*.whl
|
|
|
|
ADD ./tests/ ./tests/
|
|
ADD ./examples/ ./examples/
|
|
ADD ./benchmarks/ ./benchmarks/
|
|
ADD ./vllm/collect_env.py .
|
|
ADD ./.buildkite/ ./.buildkite/
|
|
|
|
# Create symlink for vllm-workspace to maintain CI compatibility
|
|
RUN ln -sf /workspace /vllm-workspace
|
|
|
|
# install development dependencies (for testing)
|
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
uv pip install -e tests/vllm_test_utils
|
|
|
|
######################### RELEASE IMAGE #########################
|
|
FROM base AS vllm-openai
|
|
|
|
WORKDIR /workspace/
|
|
|
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
--mount=type=cache,target=/root/.cache/ccache \
|
|
--mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
|
|
uv pip install dist/*.whl
|
|
|
|
ENTRYPOINT ["vllm", "serve"]
|