mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-03 10:57:55 +08:00
AArch64 CPU Docker pipeline (#26931)
This commit is contained in:
parent
9fce7bee74
commit
1c691f4a71
@ -15,6 +15,20 @@ steps:
|
|||||||
env:
|
env:
|
||||||
DOCKER_BUILDKIT: "1"
|
DOCKER_BUILDKIT: "1"
|
||||||
|
|
||||||
|
# aarch64 build.
|
||||||
|
- label: "Build arm64 CPU wheel"
|
||||||
|
depends_on: ~
|
||||||
|
id: build-wheel-arm64-cpu
|
||||||
|
agents:
|
||||||
|
queue: arm64_cpu_queue_postmerge
|
||||||
|
commands:
|
||||||
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile.cpu ."
|
||||||
|
- "mkdir artifacts"
|
||||||
|
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
|
||||||
|
- "bash .buildkite/scripts/upload-wheels.sh"
|
||||||
|
env:
|
||||||
|
DOCKER_BUILDKIT: "1"
|
||||||
|
|
||||||
- label: "Build wheel - CUDA 12.8"
|
- label: "Build wheel - CUDA 12.8"
|
||||||
depends_on: ~
|
depends_on: ~
|
||||||
id: build-wheel-cuda-12-8
|
id: build-wheel-cuda-12-8
|
||||||
@ -142,6 +156,22 @@ steps:
|
|||||||
env:
|
env:
|
||||||
DOCKER_BUILDKIT: "1"
|
DOCKER_BUILDKIT: "1"
|
||||||
|
|
||||||
|
- block: "Build arm64 CPU release image"
|
||||||
|
key: block-arm64-cpu-release-image-build
|
||||||
|
depends_on: ~
|
||||||
|
|
||||||
|
- label: "Build and publish arm64 CPU release image"
|
||||||
|
depends_on: block-arm64-cpu-release-image-build
|
||||||
|
agents:
|
||||||
|
queue: arm64_cpu_queue_postmerge
|
||||||
|
commands:
|
||||||
|
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
||||||
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ."
|
||||||
|
- "docker push public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:latest"
|
||||||
|
- "docker push public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
|
||||||
|
env:
|
||||||
|
DOCKER_BUILDKIT: "1"
|
||||||
|
|
||||||
- label: "Build and publish nightly multi-arch image to DockerHub"
|
- label: "Build and publish nightly multi-arch image to DockerHub"
|
||||||
depends_on:
|
depends_on:
|
||||||
- create-multi-arch-manifest
|
- create-multi-arch-manifest
|
||||||
|
|||||||
@ -106,6 +106,94 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
|||||||
--mount=type=bind,source=.git,target=.git \
|
--mount=type=bind,source=.git,target=.git \
|
||||||
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel
|
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel
|
||||||
|
|
||||||
|
#################### WHEEL BUILD IMAGE ####################
|
||||||
|
FROM base AS build
|
||||||
|
ARG TARGETPLATFORM
|
||||||
|
|
||||||
|
ARG PIP_INDEX_URL UV_INDEX_URL
|
||||||
|
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
|
||||||
|
|
||||||
|
# install build dependencies
|
||||||
|
COPY requirements/build.txt requirements/build.txt
|
||||||
|
|
||||||
|
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
|
||||||
|
# Reference: https://github.com/astral-sh/uv/pull/1694
|
||||||
|
ENV UV_HTTP_TIMEOUT=500
|
||||||
|
ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
||||||
|
# Use copy mode to avoid hardlink failures with Docker cache mounts
|
||||||
|
ENV UV_LINK_MODE=copy
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
ARG GIT_REPO_CHECK=0
|
||||||
|
RUN --mount=type=bind,source=.git,target=.git \
|
||||||
|
if [ "$GIT_REPO_CHECK" != "0" ]; then bash tools/check_repo.sh ; fi
|
||||||
|
|
||||||
|
# max jobs used by Ninja to build extensions
|
||||||
|
ARG max_jobs=2
|
||||||
|
ENV MAX_JOBS=${max_jobs}
|
||||||
|
|
||||||
|
ARG USE_SCCACHE
|
||||||
|
ARG SCCACHE_DOWNLOAD_URL=https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz
|
||||||
|
ARG SCCACHE_ENDPOINT
|
||||||
|
ARG SCCACHE_BUCKET_NAME=vllm-build-sccache
|
||||||
|
ARG SCCACHE_REGION_NAME=us-west-2
|
||||||
|
ARG SCCACHE_S3_NO_CREDENTIALS=0
|
||||||
|
|
||||||
|
# Flag to control whether to use pre-built vLLM wheels
|
||||||
|
ARG VLLM_USE_PRECOMPILED=""
|
||||||
|
|
||||||
|
# if USE_SCCACHE is set, use sccache to speed up compilation
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
--mount=type=bind,source=.git,target=.git \
|
||||||
|
if [ "$USE_SCCACHE" = "1" ]; then \
|
||||||
|
echo "Installing sccache..." \
|
||||||
|
&& curl -L -o sccache.tar.gz ${SCCACHE_DOWNLOAD_URL} \
|
||||||
|
&& tar -xzf sccache.tar.gz \
|
||||||
|
&& sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \
|
||||||
|
&& rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \
|
||||||
|
&& if [ ! -z ${SCCACHE_ENDPOINT} ] ; then export SCCACHE_ENDPOINT=${SCCACHE_ENDPOINT} ; fi \
|
||||||
|
&& export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \
|
||||||
|
&& export SCCACHE_REGION=${SCCACHE_REGION_NAME} \
|
||||||
|
&& export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
|
||||||
|
&& export SCCACHE_IDLE_TIMEOUT=0 \
|
||||||
|
&& export CMAKE_BUILD_TYPE=Release \
|
||||||
|
&& export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" \
|
||||||
|
&& export VLLM_DOCKER_BUILD_CONTEXT=1 \
|
||||||
|
&& sccache --show-stats \
|
||||||
|
&& python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
|
||||||
|
&& sccache --show-stats; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
ARG vllm_target_device="cpu"
|
||||||
|
ENV VLLM_TARGET_DEVICE=${vllm_target_device}
|
||||||
|
ENV CCACHE_DIR=/root/.cache/ccache
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/ccache \
|
||||||
|
--mount=type=cache,target=/root/.cache/uv \
|
||||||
|
--mount=type=bind,source=.git,target=.git \
|
||||||
|
if [ "$USE_SCCACHE" != "1" ]; then \
|
||||||
|
# Clean any existing CMake artifacts
|
||||||
|
rm -rf .deps && \
|
||||||
|
mkdir -p .deps && \
|
||||||
|
export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" && \
|
||||||
|
export VLLM_DOCKER_BUILD_CONTEXT=1 && \
|
||||||
|
python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check the size of the wheel if RUN_WHEEL_CHECK is true
|
||||||
|
COPY .buildkite/check-wheel-size.py check-wheel-size.py
|
||||||
|
# sync the default value with .buildkite/check-wheel-size.py
|
||||||
|
ARG VLLM_MAX_SIZE_MB=450
|
||||||
|
ENV VLLM_MAX_SIZE_MB=$VLLM_MAX_SIZE_MB
|
||||||
|
ARG RUN_WHEEL_CHECK=true
|
||||||
|
RUN if [ "$RUN_WHEEL_CHECK" = "true" ]; then \
|
||||||
|
python3 check-wheel-size.py dist; \
|
||||||
|
else \
|
||||||
|
echo "Skipping wheel size check."; \
|
||||||
|
fi
|
||||||
|
|
||||||
######################### TEST DEPS #########################
|
######################### TEST DEPS #########################
|
||||||
FROM base AS vllm-test-deps
|
FROM base AS vllm-test-deps
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user