From 435be10db9f4f2b1e0f3a13c56a105310815b830 Mon Sep 17 00:00:00 2001 From: ioana ghiban Date: Fri, 24 Oct 2025 14:11:01 +0200 Subject: [PATCH] Fix AArch64 CPU Docker pipeline (#27331) Signed-off-by: Ioana Ghiban --- .buildkite/release-pipeline.yaml | 2 +- docker/Dockerfile.cpu | 93 ++------------------------------ 2 files changed, 5 insertions(+), 90 deletions(-) diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index afb83c249087..33b7114666fa 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -22,7 +22,7 @@ steps: agents: queue: arm64_cpu_queue_postmerge commands: - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_BUILD_ACL=ON --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile.cpu ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_BUILD_ACL=ON --tag vllm-ci:build-image --target vllm-build --progress plain -f docker/Dockerfile.cpu ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/scripts/upload-wheels.sh" diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index 5798e589edaf..f3fd1ee3e32b 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -79,6 +79,9 @@ RUN echo 'ulimit -c 0' >> ~/.bashrc ######################### BUILD IMAGE ######################### FROM base AS vllm-build +ARG max_jobs=2 +ENV MAX_JOBS=${max_jobs} + ARG GIT_REPO_CHECK=0 # Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ... ARG VLLM_CPU_DISABLE_AVX512=0 @@ -104,95 +107,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=cache,target=/root/.cache/ccache \ --mount=type=cache,target=/workspace/vllm/.deps,sharing=locked \ --mount=type=bind,source=.git,target=.git \ - VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel - -#################### WHEEL BUILD IMAGE #################### -FROM base AS build -ARG TARGETPLATFORM - -ARG PIP_INDEX_URL UV_INDEX_URL -ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL - -# install build dependencies -COPY requirements/build.txt requirements/build.txt - -# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out -# Reference: https://github.com/astral-sh/uv/pull/1694 -ENV UV_HTTP_TIMEOUT=500 -ENV UV_INDEX_STRATEGY="unsafe-best-match" -# Use copy mode to avoid hardlink failures with Docker cache mounts -ENV UV_LINK_MODE=copy - -RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt - -COPY . . -ARG GIT_REPO_CHECK=0 -RUN --mount=type=bind,source=.git,target=.git \ - if [ "$GIT_REPO_CHECK" != "0" ]; then bash tools/check_repo.sh ; fi - -# max jobs used by Ninja to build extensions -ARG max_jobs=2 -ENV MAX_JOBS=${max_jobs} - -ARG USE_SCCACHE -ARG SCCACHE_DOWNLOAD_URL=https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz -ARG SCCACHE_ENDPOINT -ARG SCCACHE_BUCKET_NAME=vllm-build-sccache -ARG SCCACHE_REGION_NAME=us-west-2 -ARG SCCACHE_S3_NO_CREDENTIALS=0 - -# Flag to control whether to use pre-built vLLM wheels -ARG VLLM_USE_PRECOMPILED="" - -# if USE_SCCACHE is set, use sccache to speed up compilation -RUN --mount=type=cache,target=/root/.cache/uv \ - --mount=type=bind,source=.git,target=.git \ - if [ "$USE_SCCACHE" = "1" ]; then \ - echo "Installing sccache..." \ - && curl -L -o sccache.tar.gz ${SCCACHE_DOWNLOAD_URL} \ - && tar -xzf sccache.tar.gz \ - && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \ - && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \ - && if [ ! -z ${SCCACHE_ENDPOINT} ] ; then export SCCACHE_ENDPOINT=${SCCACHE_ENDPOINT} ; fi \ - && export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \ - && export SCCACHE_REGION=${SCCACHE_REGION_NAME} \ - && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \ - && export SCCACHE_IDLE_TIMEOUT=0 \ - && export CMAKE_BUILD_TYPE=Release \ - && export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" \ - && export VLLM_DOCKER_BUILD_CONTEXT=1 \ - && sccache --show-stats \ - && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \ - && sccache --show-stats; \ - fi - -ARG vllm_target_device="cpu" -ENV VLLM_TARGET_DEVICE=${vllm_target_device} -ENV CCACHE_DIR=/root/.cache/ccache -RUN --mount=type=cache,target=/root/.cache/ccache \ - --mount=type=cache,target=/root/.cache/uv \ - --mount=type=bind,source=.git,target=.git \ - if [ "$USE_SCCACHE" != "1" ]; then \ - # Clean any existing CMake artifacts - rm -rf .deps && \ - mkdir -p .deps && \ - export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" && \ - export VLLM_DOCKER_BUILD_CONTEXT=1 && \ - python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \ - fi - -# Check the size of the wheel if RUN_WHEEL_CHECK is true -COPY .buildkite/check-wheel-size.py check-wheel-size.py -# sync the default value with .buildkite/check-wheel-size.py -ARG VLLM_MAX_SIZE_MB=450 -ENV VLLM_MAX_SIZE_MB=$VLLM_MAX_SIZE_MB -ARG RUN_WHEEL_CHECK=true -RUN if [ "$RUN_WHEEL_CHECK" = "true" ]; then \ - python3 check-wheel-size.py dist; \ - else \ - echo "Skipping wheel size check."; \ - fi + VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 ######################### TEST DEPS ######################### FROM base AS vllm-test-deps