From 3168285fcaaee09bc93dce7bc9ae6ee823c71652 Mon Sep 17 00:00:00 2001 From: Fadi Arafeh <115173828+fadara01@users.noreply.github.com> Date: Thu, 20 Nov 2025 02:37:09 +0000 Subject: [PATCH] [cpu][ci] Add initial set of tests for Arm CPUs (#28657) Signed-off-by: Fadi Arafeh --- .../scripts/hardware_ci/run-cpu-test-arm.sh | 64 +++++++++++++++++++ docker/Dockerfile.cpu | 10 +++ 2 files changed, 74 insertions(+) create mode 100755 .buildkite/scripts/hardware_ci/run-cpu-test-arm.sh diff --git a/.buildkite/scripts/hardware_ci/run-cpu-test-arm.sh b/.buildkite/scripts/hardware_ci/run-cpu-test-arm.sh new file mode 100755 index 000000000000..d0036f24c8d0 --- /dev/null +++ b/.buildkite/scripts/hardware_ci/run-cpu-test-arm.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +# This script build the CPU docker image and run the offline inference inside the container. +# It serves a sanity check for compilation and basic model usage. +set -ex + +# allow to bind to different cores +CORE_RANGE=${CORE_RANGE:-0-16} +OMP_CORE_RANGE=${OMP_CORE_RANGE:-0-16} +NUMA_NODE=${NUMA_NODE:-0} + +export CMAKE_BUILD_PARALLEL_LEVEL=32 + +# Setup cleanup +remove_docker_container() { + set -e; + docker rm -f cpu-test-"$NUMA_NODE" || true; +} +trap remove_docker_container EXIT +remove_docker_container + +# Try building the docker image +numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --tag cpu-test-"$NUMA_NODE" --target vllm-test -f docker/Dockerfile.cpu . + +# Run the image, setting --shm-size=4g for tensor parallel. +docker run -itd --cpuset-cpus="$CORE_RANGE" --cpuset-mems="$NUMA_NODE" --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=16 --env VLLM_CPU_CI_ENV=1 -e E2E_OMP_THREADS="$OMP_CORE_RANGE" --shm-size=4g --name cpu-test-"$NUMA_NODE" cpu-test-"$NUMA_NODE" + +function cpu_tests() { + set -e + export NUMA_NODE=$2 + + docker exec cpu-test-"$NUMA_NODE" bash -c " + set -e + pip list" + + # offline inference + docker exec cpu-test-"$NUMA_NODE" bash -c " + set -e + python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m" + + # Run kernel tests + docker exec cpu-test-"$NUMA_NODE" bash -c " + set -e + pytest -x -v -s tests/kernels/test_onednn.py + pytest -x -v -s tests/kernels/attention/test_cpu_attn.py" + + # basic online serving + docker exec cpu-test-"$NUMA_NODE" bash -c ' + set -e + VLLM_CPU_OMP_THREADS_BIND=$E2E_OMP_THREADS vllm serve meta-llama/Llama-3.2-3B-Instruct --max-model-len 2048 & + server_pid=$! + timeout 600 bash -c "until curl localhost:8000/v1/models; do sleep 1; done" || exit 1 + vllm bench serve \ + --backend vllm \ + --dataset-name random \ + --model meta-llama/Llama-3.2-3B-Instruct \ + --num-prompts 20 \ + --endpoint /v1/completions + kill -s SIGTERM $server_pid &' +} + +# All of CPU tests are expected to be finished less than 40 mins. +export -f cpu_tests +timeout 2h bash -c "cpu_tests $CORE_RANGE $NUMA_NODE" diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index 4c961defaeda..eb3807ef0ca4 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -37,6 +37,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 \ && curl -LsSf https://astral.sh/uv/install.sh | sh +ENV CC=/usr/bin/gcc-12 CXX=/usr/bin/g++-12 ENV CCACHE_DIR=/root/.cache/ccache ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache @@ -122,6 +123,15 @@ WORKDIR /workspace/vllm RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \ cp requirements/test.in requirements/cpu-test.in && \ sed -i '/mamba_ssm/d' requirements/cpu-test.in && \ + remove_packages_not_supported_on_aarch64() { \ + case "$(uname -m)" in \ + aarch64|arm64) \ + sed -i '/decord/d' requirements/cpu-test.in; \ + sed -i '/terratorch/d' requirements/cpu-test.in; \ + ;; \ + esac; \ + }; \ + remove_packages_not_supported_on_aarch64 && \ sed -i 's/^torch==.*/torch==2.8.0/g' requirements/cpu-test.in && \ sed -i 's/torchaudio.*/torchaudio/g' requirements/cpu-test.in && \ sed -i 's/torchvision.*/torchvision/g' requirements/cpu-test.in && \