From e2f56c309d2a28899c68975a7e104502d56deb8f Mon Sep 17 00:00:00 2001 From: "Li, Jiang" Date: Fri, 28 Nov 2025 21:37:54 +0800 Subject: [PATCH] [CPU] Update torch 2.9.1 for CPU backend (#29664) Signed-off-by: jiang1.li --- .buildkite/scripts/hardware_ci/run-cpu-test.sh | 4 ++-- csrc/cpu/utils.cpp | 13 +++++++------ docker/Dockerfile.cpu | 2 +- requirements/cpu-build.txt | 5 ++--- requirements/cpu.txt | 15 ++++----------- vllm/platforms/cpu.py | 1 - 6 files changed, 16 insertions(+), 24 deletions(-) diff --git a/.buildkite/scripts/hardware_ci/run-cpu-test.sh b/.buildkite/scripts/hardware_ci/run-cpu-test.sh index 2267718f75ca5..438fe522c8702 100644 --- a/.buildkite/scripts/hardware_ci/run-cpu-test.sh +++ b/.buildkite/scripts/hardware_ci/run-cpu-test.sh @@ -21,8 +21,8 @@ trap remove_docker_container EXIT remove_docker_container # Try building the docker image -numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --tag cpu-test-"$NUMA_NODE" --target vllm-test -f docker/Dockerfile.cpu . -numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$NUMA_NODE"-avx2 --target vllm-test -f docker/Dockerfile.cpu . +numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --progress plain --tag cpu-test-"$NUMA_NODE" --target vllm-test -f docker/Dockerfile.cpu . +numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --progress plain --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$NUMA_NODE"-avx2 --target vllm-test -f docker/Dockerfile.cpu . # Run the image, setting --shm-size=4g for tensor parallel. docker run -itd --cpuset-cpus="$CORE_RANGE" --cpuset-mems="$NUMA_NODE" --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=16 --env VLLM_CPU_CI_ENV=1 -e E2E_OMP_THREADS="$OMP_CORE_RANGE" --shm-size=4g --name cpu-test-"$NUMA_NODE" cpu-test-"$NUMA_NODE" diff --git a/csrc/cpu/utils.cpp b/csrc/cpu/utils.cpp index 5199ba2af024f..3dacfc7b2b7a3 100644 --- a/csrc/cpu/utils.cpp +++ b/csrc/cpu/utils.cpp @@ -51,12 +51,13 @@ std::string init_cpu_threads_env(const std::string& cpu_ids) { if (node_id != -1) { node_ids.insert(node_id); } - TORCH_WARN(node_id == mem_node_id, "CPU ", cpu_id, " is on NUMA node ", - node_id, ", but CPU ", omp_cpu_ids.front(), - " is on NUMA node ", mem_node_id, - ". All CPUs should be on the same NUMA node for optimal " - "performance. Memory will be bound to NUMA node ", - mem_node_id, "."); + if (node_id != mem_node_id) { + TORCH_WARN("CPU ", cpu_id, " is on NUMA node ", node_id, ", but CPU ", + omp_cpu_ids.front(), " is on NUMA node ", mem_node_id, + ". All CPUs should be on the same NUMA node for optimal " + "performance. Memory will be bound to NUMA node ", + mem_node_id, "."); + } } // Concatenate all node_ids into a single comma-separated string if (!node_ids.empty()) { diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index eb3807ef0ca4e..8d55ecfba3e52 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -132,7 +132,7 @@ RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \ esac; \ }; \ remove_packages_not_supported_on_aarch64 && \ - sed -i 's/^torch==.*/torch==2.8.0/g' requirements/cpu-test.in && \ + sed -i 's/^torch==.*/torch==2.9.1/g' requirements/cpu-test.in && \ sed -i 's/torchaudio.*/torchaudio/g' requirements/cpu-test.in && \ sed -i 's/torchvision.*/torchvision/g' requirements/cpu-test.in && \ uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu diff --git a/requirements/cpu-build.txt b/requirements/cpu-build.txt index 81d429a5e5f8d..e18e0825fc428 100644 --- a/requirements/cpu-build.txt +++ b/requirements/cpu-build.txt @@ -4,9 +4,8 @@ packaging>=24.2 setuptools>=77.0.3,<81.0.0 setuptools-scm>=8 --extra-index-url https://download.pytorch.org/whl/cpu -torch==2.8.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x" -torch==2.9.0; platform_system == "Darwin" -torch==2.8.0; platform_machine == "ppc64le" or platform_machine == "aarch64" +torch==2.9.1+cpu; platform_machine == "x86_64" or platform_machine == "s390x" +torch==2.9.1; platform_system == "Darwin" or platform_machine == "ppc64le" or platform_machine == "aarch64" scons; platform_machine == "aarch64" # needed to build Arm Compute Library (ACL) wheel jinja2>=3.1.6 diff --git a/requirements/cpu.txt b/requirements/cpu.txt index e23d3286f3f78..21571be479c83 100644 --- a/requirements/cpu.txt +++ b/requirements/cpu.txt @@ -4,25 +4,18 @@ numba == 0.61.2; platform_machine != "s390x" # Required for N-gram speculative decoding # Dependencies for CPUs -packaging>=24.2 -setuptools>=77.0.3,<81.0.0 --extra-index-url https://download.pytorch.org/whl/cpu -torch==2.8.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x" -torch==2.9.0; platform_system == "Darwin" -torch==2.8.0; platform_machine == "ppc64le" or platform_machine == "aarch64" +torch==2.9.1+cpu; platform_machine == "x86_64" or platform_machine == "s390x" +torch==2.9.1; platform_system == "Darwin" or platform_machine == "ppc64le" or platform_machine == "aarch64" # required for the image processor of minicpm-o-2_6, this must be updated alongside torch -torchaudio; platform_machine != "ppc64le" and platform_machine != "s390x" -torchaudio==2.8.0; platform_machine == "ppc64le" +torchaudio; platform_machine != "s390x" # required for the image processor of phi3v, this must be updated alongside torch -torchvision; platform_machine != "ppc64le" and platform_machine != "s390x" -torchvision==0.23.0; platform_machine == "ppc64le" -datasets # for benchmark scripts +torchvision; platform_machine != "s390x" # Intel Extension for PyTorch, only for x86_64 CPUs intel-openmp==2024.2.1; platform_machine == "x86_64" -triton==3.2.0; platform_machine == "x86_64" # Triton is required for torch 2.6+cpu, as it is imported in torch.compile. # Use this to gather CPU info and optimize based on ARM Neoverse cores py-cpuinfo; platform_machine == "aarch64" diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py index 5f9561366e0d5..2b2c2f9cdc571 100644 --- a/vllm/platforms/cpu.py +++ b/vllm/platforms/cpu.py @@ -384,7 +384,6 @@ class CpuPlatform(Platform): @classmethod def is_pin_memory_available(cls) -> bool: - logger.warning("Pin memory is not supported on CPU.") return False @classmethod