[CI] Unifying Dockerfiles for ARM and X86 Builds (#21343)

Signed-off-by: Kebe <mail@kebe7jun.com>
This commit is contained in:
Kebe 2025-07-25 22:33:56 +08:00 committed by GitHub
parent e189b50f53
commit 396ee94180
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 29 additions and 67 deletions

View File

@ -7,7 +7,7 @@ permissions:
jobs:
lint-and-deploy:
runs-on: ubuntu-latest
runs-on: ubuntu-24.04-arm
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

View File

@ -1,62 +0,0 @@
# This vLLM Dockerfile is used to construct an image that can build and run vLLM on ARM CPU platform.
FROM ubuntu:22.04 AS cpu-test-arm
ENV CCACHE_DIR=/root/.cache/ccache
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
RUN --mount=type=cache,target=/var/cache/apt \
apt-get update -y \
&& apt-get install -y curl ccache git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \
&& apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
# tcmalloc provides better memory allocation efficiency, e.g., holding memory in caches to speed up access of commonly-used objects.
RUN --mount=type=cache,target=/root/.cache/pip \
pip install py-cpuinfo # Use this to gather CPU info and optimize based on ARM Neoverse cores
# Set LD_PRELOAD for tcmalloc on ARM
ENV LD_PRELOAD="/usr/lib/aarch64-linux-gnu/libtcmalloc_minimal.so.4"
RUN echo 'ulimit -c 0' >> ~/.bashrc
WORKDIR /workspace
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,src=requirements/build.txt,target=requirements/build.txt \
pip install --upgrade pip && \
pip install -r requirements/build.txt
FROM cpu-test-arm AS build
WORKDIR /workspace/vllm
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \
--mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \
pip install -v -r requirements/cpu.txt
COPY . .
ARG GIT_REPO_CHECK=0
RUN --mount=type=bind,source=.git,target=.git \
if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
# Disabling AVX512 specific optimizations for ARM
ARG VLLM_CPU_DISABLE_AVX512="true"
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/ccache \
--mount=type=bind,source=.git,target=.git \
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \
pip install dist/*.whl && \
rm -rf dist
WORKDIR /workspace/
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]

View File

@ -1,4 +1,11 @@
# This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform.
# This vLLM Dockerfile is used to build images that can run vLLM on both x86_64 and arm64 CPU platforms.
#
# Supported platforms:
# - linux/amd64 (x86_64)
# - linux/arm64 (aarch64)
#
# Use the `--platform` option with `docker buildx build` to specify the target architecture, e.g.:
# docker buildx build --platform=linux/arm64 -f docker/Dockerfile.cpu .
#
# Build targets:
# vllm-openai (default): used for serving deployment
@ -53,7 +60,20 @@ RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --upgrade pip && \
uv pip install -r requirements/cpu.txt
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so:$LD_PRELOAD"
ARG TARGETARCH
ENV TARGETARCH=${TARGETARCH}
RUN if [ "$TARGETARCH" = "arm64" ]; then \
PRELOAD_PATH="/usr/lib/aarch64-linux-gnu/libtcmalloc_minimal.so.4"; \
else \
PRELOAD_PATH="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so"; \
fi && \
echo "export LD_PRELOAD=$PRELOAD_PATH" >> ~/.bashrc
# Ensure that the LD_PRELOAD environment variable for export is in effect.
SHELL ["/bin/bash", "-c"]
ENV LD_PRELOAD=${LD_PRELOAD}
RUN echo 'ulimit -c 0' >> ~/.bashrc

View File

@ -33,7 +33,7 @@ Testing has been conducted on AWS Graviton3 instances for compatibility.
# --8<-- [end:pre-built-images]
# --8<-- [start:build-image-from-source]
```bash
docker build -f docker/Dockerfile.arm \
docker build -f docker/Dockerfile.cpu \
--tag vllm-cpu-env .
# Launching OpenAI server

View File

@ -10,7 +10,8 @@ setuptools>=77.0.3,<80.0.0
--extra-index-url https://download.pytorch.org/whl/cpu
torch==2.6.0+cpu; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218
torch==2.7.0; platform_system == "Darwin"
torch==2.7.0; platform_machine == "ppc64le" or platform_machine == "aarch64"
torch==2.7.0; platform_machine == "ppc64le"
torch==2.6.0; platform_machine == "aarch64" # for arm64 CPUs, torch 2.7.0 has a issue: https://github.com/vllm-project/vllm/issues/17960
# required for the image processor of minicpm-o-2_6, this must be updated alongside torch
torchaudio; platform_machine != "ppc64le" and platform_machine != "s390x"
@ -25,3 +26,6 @@ datasets # for benchmark scripts
intel-openmp==2024.2.1; platform_machine == "x86_64"
intel_extension_for_pytorch==2.6.0; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218
triton==3.2.0; platform_machine == "x86_64" # Triton is required for torch 2.6+cpu, as it is imported in torch.compile.
# Use this to gather CPU info and optimize based on ARM Neoverse cores
py-cpuinfo; platform_machine == "aarch64"