mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-26 13:00:20 +08:00
[CI] Unifying Dockerfiles for ARM and X86 Builds (#21343)
Signed-off-by: Kebe <mail@kebe7jun.com>
This commit is contained in:
parent
e189b50f53
commit
396ee94180
2
.github/workflows/lint-and-deploy.yaml
vendored
2
.github/workflows/lint-and-deploy.yaml
vendored
@ -7,7 +7,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
lint-and-deploy:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04-arm
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
@ -1,62 +0,0 @@
|
||||
# This vLLM Dockerfile is used to construct an image that can build and run vLLM on ARM CPU platform.
|
||||
|
||||
FROM ubuntu:22.04 AS cpu-test-arm
|
||||
|
||||
ENV CCACHE_DIR=/root/.cache/ccache
|
||||
|
||||
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt \
|
||||
apt-get update -y \
|
||||
&& apt-get install -y curl ccache git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \
|
||||
&& apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
|
||||
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
|
||||
|
||||
# tcmalloc provides better memory allocation efficiency, e.g., holding memory in caches to speed up access of commonly-used objects.
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip install py-cpuinfo # Use this to gather CPU info and optimize based on ARM Neoverse cores
|
||||
|
||||
# Set LD_PRELOAD for tcmalloc on ARM
|
||||
ENV LD_PRELOAD="/usr/lib/aarch64-linux-gnu/libtcmalloc_minimal.so.4"
|
||||
|
||||
RUN echo 'ulimit -c 0' >> ~/.bashrc
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
|
||||
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
--mount=type=bind,src=requirements/build.txt,target=requirements/build.txt \
|
||||
pip install --upgrade pip && \
|
||||
pip install -r requirements/build.txt
|
||||
|
||||
FROM cpu-test-arm AS build
|
||||
|
||||
WORKDIR /workspace/vllm
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
--mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \
|
||||
--mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \
|
||||
pip install -v -r requirements/cpu.txt
|
||||
|
||||
COPY . .
|
||||
ARG GIT_REPO_CHECK=0
|
||||
RUN --mount=type=bind,source=.git,target=.git \
|
||||
if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
|
||||
|
||||
# Disabling AVX512 specific optimizations for ARM
|
||||
ARG VLLM_CPU_DISABLE_AVX512="true"
|
||||
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
--mount=type=cache,target=/root/.cache/ccache \
|
||||
--mount=type=bind,source=.git,target=.git \
|
||||
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \
|
||||
pip install dist/*.whl && \
|
||||
rm -rf dist
|
||||
|
||||
WORKDIR /workspace/
|
||||
|
||||
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
|
||||
|
||||
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
||||
@ -1,4 +1,11 @@
|
||||
# This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform.
|
||||
# This vLLM Dockerfile is used to build images that can run vLLM on both x86_64 and arm64 CPU platforms.
|
||||
#
|
||||
# Supported platforms:
|
||||
# - linux/amd64 (x86_64)
|
||||
# - linux/arm64 (aarch64)
|
||||
#
|
||||
# Use the `--platform` option with `docker buildx build` to specify the target architecture, e.g.:
|
||||
# docker buildx build --platform=linux/arm64 -f docker/Dockerfile.cpu .
|
||||
#
|
||||
# Build targets:
|
||||
# vllm-openai (default): used for serving deployment
|
||||
@ -53,7 +60,20 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install --upgrade pip && \
|
||||
uv pip install -r requirements/cpu.txt
|
||||
|
||||
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so:$LD_PRELOAD"
|
||||
ARG TARGETARCH
|
||||
ENV TARGETARCH=${TARGETARCH}
|
||||
|
||||
RUN if [ "$TARGETARCH" = "arm64" ]; then \
|
||||
PRELOAD_PATH="/usr/lib/aarch64-linux-gnu/libtcmalloc_minimal.so.4"; \
|
||||
else \
|
||||
PRELOAD_PATH="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so"; \
|
||||
fi && \
|
||||
echo "export LD_PRELOAD=$PRELOAD_PATH" >> ~/.bashrc
|
||||
|
||||
# Ensure that the LD_PRELOAD environment variable for export is in effect.
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
ENV LD_PRELOAD=${LD_PRELOAD}
|
||||
|
||||
RUN echo 'ulimit -c 0' >> ~/.bashrc
|
||||
|
||||
|
||||
@ -33,7 +33,7 @@ Testing has been conducted on AWS Graviton3 instances for compatibility.
|
||||
# --8<-- [end:pre-built-images]
|
||||
# --8<-- [start:build-image-from-source]
|
||||
```bash
|
||||
docker build -f docker/Dockerfile.arm \
|
||||
docker build -f docker/Dockerfile.cpu \
|
||||
--tag vllm-cpu-env .
|
||||
|
||||
# Launching OpenAI server
|
||||
|
||||
@ -10,7 +10,8 @@ setuptools>=77.0.3,<80.0.0
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
torch==2.6.0+cpu; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218
|
||||
torch==2.7.0; platform_system == "Darwin"
|
||||
torch==2.7.0; platform_machine == "ppc64le" or platform_machine == "aarch64"
|
||||
torch==2.7.0; platform_machine == "ppc64le"
|
||||
torch==2.6.0; platform_machine == "aarch64" # for arm64 CPUs, torch 2.7.0 has a issue: https://github.com/vllm-project/vllm/issues/17960
|
||||
|
||||
# required for the image processor of minicpm-o-2_6, this must be updated alongside torch
|
||||
torchaudio; platform_machine != "ppc64le" and platform_machine != "s390x"
|
||||
@ -25,3 +26,6 @@ datasets # for benchmark scripts
|
||||
intel-openmp==2024.2.1; platform_machine == "x86_64"
|
||||
intel_extension_for_pytorch==2.6.0; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218
|
||||
triton==3.2.0; platform_machine == "x86_64" # Triton is required for torch 2.6+cpu, as it is imported in torch.compile.
|
||||
|
||||
# Use this to gather CPU info and optimize based on ARM Neoverse cores
|
||||
py-cpuinfo; platform_machine == "aarch64"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user