# This vLLM Dockerfile is used to build images that can run vLLM on both x86_64 and arm64 CPU platforms. # # Supported platforms: # - linux/amd64 (x86_64) # - linux/arm64 (aarch64) # # Use the `--platform` option with `docker buildx build` to specify the target architecture, e.g.: # docker buildx build --platform=linux/arm64 -f docker/Dockerfile.cpu . # # Build targets: # vllm-openai (default): used for serving deployment # vllm-test: used for CI tests # vllm-dev: used for development # # Build arguments: # PYTHON_VERSION=3.13|3.12 (default)|3.11|3.10 # VLLM_CPU_DISABLE_AVX512=false (default)|true # VLLM_CPU_AVX512BF16=false (default)|true # VLLM_CPU_AVX512VNNI=false (default)|true # ######################### COMMON BASE IMAGE ######################### FROM ubuntu:22.04 AS base-common WORKDIR /workspace/ ARG PYTHON_VERSION=3.12 ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" # Install minimal dependencies and uv RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ --mount=type=cache,target=/var/lib/apt,sharing=locked \ apt-get update -y \ && apt-get install -y --no-install-recommends ccache git curl wget ca-certificates \ gcc-12 g++-12 libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 jq lsof \ && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 \ && curl -LsSf https://astral.sh/uv/install.sh | sh ENV CCACHE_DIR=/root/.cache/ccache ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache ENV PATH="/root/.local/bin:$PATH" ENV VIRTUAL_ENV="/opt/venv" ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV} ENV PATH="$VIRTUAL_ENV/bin:$PATH" ENV UV_HTTP_TIMEOUT=500 # Install Python dependencies ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} ENV UV_INDEX_STRATEGY="unsafe-best-match" ENV UV_LINK_MODE="copy" RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \ --mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \ uv pip install --upgrade pip && \ uv pip install -r requirements/cpu.txt ARG TARGETARCH ENV TARGETARCH=${TARGETARCH} ######################### x86_64 BASE IMAGE ######################### FROM base-common AS base-amd64 ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so" ######################### arm64 BASE IMAGE ######################### FROM base-common AS base-arm64 ENV LD_PRELOAD="/usr/lib/aarch64-linux-gnu/libtcmalloc_minimal.so.4" ######################### BASE IMAGE ######################### FROM base-${TARGETARCH} AS base RUN echo 'ulimit -c 0' >> ~/.bashrc ######################### BUILD IMAGE ######################### FROM base AS vllm-build ARG GIT_REPO_CHECK=0 # Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ... ARG VLLM_CPU_DISABLE_AVX512=0 ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512} # Support for building with AVX512BF16 ISA: docker build --build-arg VLLM_CPU_AVX512BF16="true" ... ARG VLLM_CPU_AVX512BF16=0 ENV VLLM_CPU_AVX512BF16=${VLLM_CPU_AVX512BF16} # Support for building with AVX512VNNI ISA: docker build --build-arg VLLM_CPU_AVX512VNNI="true" ... ARG VLLM_CPU_AVX512VNNI=0 ENV VLLM_CPU_AVX512VNNI=${VLLM_CPU_AVX512VNNI} WORKDIR /workspace/vllm RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,src=requirements/cpu-build.txt,target=requirements/build.txt \ uv pip install -r requirements/build.txt COPY . . RUN --mount=type=bind,source=.git,target=.git \ if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=cache,target=/root/.cache/ccache \ --mount=type=cache,target=/workspace/vllm/.deps,sharing=locked \ --mount=type=bind,source=.git,target=.git \ VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel #################### WHEEL BUILD IMAGE #################### FROM base AS build ARG TARGETPLATFORM ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL # install build dependencies COPY requirements/build.txt requirements/build.txt # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 ENV UV_HTTP_TIMEOUT=500 ENV UV_INDEX_STRATEGY="unsafe-best-match" # Use copy mode to avoid hardlink failures with Docker cache mounts ENV UV_LINK_MODE=copy RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt COPY . . ARG GIT_REPO_CHECK=0 RUN --mount=type=bind,source=.git,target=.git \ if [ "$GIT_REPO_CHECK" != "0" ]; then bash tools/check_repo.sh ; fi # max jobs used by Ninja to build extensions ARG max_jobs=2 ENV MAX_JOBS=${max_jobs} ARG USE_SCCACHE ARG SCCACHE_DOWNLOAD_URL=https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz ARG SCCACHE_ENDPOINT ARG SCCACHE_BUCKET_NAME=vllm-build-sccache ARG SCCACHE_REGION_NAME=us-west-2 ARG SCCACHE_S3_NO_CREDENTIALS=0 # Flag to control whether to use pre-built vLLM wheels ARG VLLM_USE_PRECOMPILED="" # if USE_SCCACHE is set, use sccache to speed up compilation RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,source=.git,target=.git \ if [ "$USE_SCCACHE" = "1" ]; then \ echo "Installing sccache..." \ && curl -L -o sccache.tar.gz ${SCCACHE_DOWNLOAD_URL} \ && tar -xzf sccache.tar.gz \ && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \ && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \ && if [ ! -z ${SCCACHE_ENDPOINT} ] ; then export SCCACHE_ENDPOINT=${SCCACHE_ENDPOINT} ; fi \ && export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \ && export SCCACHE_REGION=${SCCACHE_REGION_NAME} \ && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \ && export SCCACHE_IDLE_TIMEOUT=0 \ && export CMAKE_BUILD_TYPE=Release \ && export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" \ && export VLLM_DOCKER_BUILD_CONTEXT=1 \ && sccache --show-stats \ && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \ && sccache --show-stats; \ fi ARG vllm_target_device="cpu" ENV VLLM_TARGET_DEVICE=${vllm_target_device} ENV CCACHE_DIR=/root/.cache/ccache RUN --mount=type=cache,target=/root/.cache/ccache \ --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,source=.git,target=.git \ if [ "$USE_SCCACHE" != "1" ]; then \ # Clean any existing CMake artifacts rm -rf .deps && \ mkdir -p .deps && \ export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" && \ export VLLM_DOCKER_BUILD_CONTEXT=1 && \ python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \ fi # Check the size of the wheel if RUN_WHEEL_CHECK is true COPY .buildkite/check-wheel-size.py check-wheel-size.py # sync the default value with .buildkite/check-wheel-size.py ARG VLLM_MAX_SIZE_MB=450 ENV VLLM_MAX_SIZE_MB=$VLLM_MAX_SIZE_MB ARG RUN_WHEEL_CHECK=true RUN if [ "$RUN_WHEEL_CHECK" = "true" ]; then \ python3 check-wheel-size.py dist; \ else \ echo "Skipping wheel size check."; \ fi ######################### TEST DEPS ######################### FROM base AS vllm-test-deps WORKDIR /workspace/vllm RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \ cp requirements/test.in requirements/cpu-test.in && \ sed -i '/mamba_ssm/d' requirements/cpu-test.in && \ uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install -r requirements/cpu-test.txt ######################### DEV IMAGE ######################### FROM vllm-build AS vllm-dev WORKDIR /workspace/vllm RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ --mount=type=cache,target=/var/lib/apt,sharing=locked \ apt-get install -y --no-install-recommends vim numactl xz-utils # install development dependencies (for testing) RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install -e tests/vllm_test_utils RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=cache,target=/root/.cache/ccache \ --mount=type=bind,source=.git,target=.git \ VLLM_TARGET_DEVICE=cpu python3 setup.py develop COPY --from=vllm-test-deps /workspace/vllm/requirements/cpu-test.txt requirements/test.txt RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install -r requirements/dev.txt && \ pre-commit install --hook-type pre-commit --hook-type commit-msg ENTRYPOINT ["bash"] ######################### TEST IMAGE ######################### FROM vllm-test-deps AS vllm-test WORKDIR /workspace/ RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \ uv pip install dist/*.whl ADD ./tests/ ./tests/ ADD ./examples/ ./examples/ ADD ./benchmarks/ ./benchmarks/ ADD ./vllm/collect_env.py . ADD ./.buildkite/ ./.buildkite/ # Create symlink for vllm-workspace to maintain CI compatibility RUN ln -sf /workspace /vllm-workspace # install development dependencies (for testing) RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install -e tests/vllm_test_utils ######################### RELEASE IMAGE ######################### FROM base AS vllm-openai WORKDIR /workspace/ RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=cache,target=/root/.cache/ccache \ --mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \ uv pip install dist/*.whl ENTRYPOINT ["vllm", "serve"]