diff --git a/cmake/cpu_extension.cmake b/cmake/cpu_extension.cmake index 9bac5ea41c8d4..4b8f0daacb007 100644 --- a/cmake/cpu_extension.cmake +++ b/cmake/cpu_extension.cmake @@ -188,16 +188,47 @@ else() message(FATAL_ERROR "vLLM CPU backend requires AVX512, AVX2, Power9+ ISA, S390X ISA, ARMv8 or RISC-V support.") endif() -# -# Build oneDNN for W8A8 GEMM kernels (only for x86-AVX512 /ARM platforms) -# Flag to enable ACL kernels for AARCH64 platforms -if (VLLM_BUILD_ACL STREQUAL "ON") - set(USE_ACL ON) -else() - set(USE_ACL OFF) -endif() +# Build oneDNN for GEMM kernels (only for x86-AVX512 /ARM platforms) if ((AVX512_FOUND AND NOT AVX512_DISABLED) OR (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND) OR POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND) + # Fetch and build Arm Compute Library (ACL) as oneDNN's backend for AArch64 + # TODO [fadara01]: remove this once ACL can be fetched and built automatically as a dependency of oneDNN + if(ASIMD_FOUND) + if(DEFINED ENV{ACL_ROOT_DIR} AND IS_DIRECTORY "$ENV{ACL_ROOT_DIR}") + message(STATUS "Using ACL from specified source directory: $ENV{ACL_ROOT_DIR}") + else() + message(STATUS "Downloading Arm Compute Library (ACL) from GitHub") + FetchContent_Populate(arm_compute + SUBBUILD_DIR "${FETCHCONTENT_BASE_DIR}/arm_compute-subbuild" + SOURCE_DIR "${FETCHCONTENT_BASE_DIR}/arm_compute-src" + GIT_REPOSITORY https://github.com/ARM-software/ComputeLibrary.git + GIT_TAG v52.2.0 + GIT_SHALLOW TRUE + GIT_PROGRESS TRUE + ) + set(ENV{ACL_ROOT_DIR} "${arm_compute_SOURCE_DIR}") + endif() + + # Build ACL with scons + include(ProcessorCount) + ProcessorCount(_NPROC) + execute_process( + COMMAND scons -j${_NPROC} + Werror=0 debug=0 neon=1 examples=0 embed_kernels=0 os=linux + arch=armv8.2-a build=native benchmark_examples=0 fixed_format_kernels=1 + multi_isa=1 openmp=1 cppthreads=0 + WORKING_DIRECTORY "$ENV{ACL_ROOT_DIR}" + RESULT_VARIABLE _acl_rc + ) + if(NOT _acl_rc EQUAL 0) + message(FATAL_ERROR "ACL SCons build failed (exit ${_acl_rc}).") + endif() + + set(ONEDNN_AARCH64_USE_ACL "ON") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-rpath,$ENV{ACL_ROOT_DIR}/build/") + add_compile_definitions(VLLM_USE_ACL) + endif() + set(FETCHCONTENT_SOURCE_DIR_ONEDNN "$ENV{FETCHCONTENT_SOURCE_DIR_ONEDNN}" CACHE PATH "Path to a local oneDNN source directory.") if(FETCHCONTENT_SOURCE_DIR_ONEDNN) @@ -217,16 +248,6 @@ if ((AVX512_FOUND AND NOT AVX512_DISABLED) OR (ASIMD_FOUND AND NOT APPLE_SILICON ) endif() - if(USE_ACL) - find_library(ARM_COMPUTE_LIBRARY NAMES arm_compute PATHS $ENV{ACL_ROOT_DIR}/build/) - if(NOT ARM_COMPUTE_LIBRARY) - message(FATAL_ERROR "Could not find ARM Compute Library: please set ACL_ROOT_DIR") - endif() - set(ONEDNN_AARCH64_USE_ACL "ON") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-rpath,$ENV{ACL_ROOT_DIR}/build/") - add_compile_definitions(VLLM_USE_ACL) - endif() - set(ONEDNN_LIBRARY_TYPE "STATIC") set(ONEDNN_BUILD_DOC "OFF") set(ONEDNN_BUILD_EXAMPLES "OFF") diff --git a/requirements/cpu-build.txt b/requirements/cpu-build.txt index b511b0f5d31b3..bba7bc7a4d8c4 100644 --- a/requirements/cpu-build.txt +++ b/requirements/cpu-build.txt @@ -6,6 +6,7 @@ setuptools-scm>=8 --extra-index-url https://download.pytorch.org/whl/cpu torch==2.8.0+cpu; platform_machine == "x86_64" torch==2.8.0; platform_machine == "ppc64le" or platform_machine == "aarch64" or platform_system == "Darwin" +scons; platform_machine == "aarch64" # needed to build Arm Compute Library (ACL) wheel jinja2>=3.1.6 regex diff --git a/vllm/model_executor/layers/utils.py b/vllm/model_executor/layers/utils.py index c1a48fa200ca0..e53d97f7fcf99 100644 --- a/vllm/model_executor/layers/utils.py +++ b/vllm/model_executor/layers/utils.py @@ -8,7 +8,7 @@ import torch from vllm import _custom_ops as ops from vllm import envs -from vllm.platforms import CpuArchEnum, current_platform +from vllm.platforms import current_platform from vllm.utils.torch_utils import direct_register_custom_op @@ -178,10 +178,7 @@ def dispatch_cpu_unquantized_gemm( ) if remove_weight: layer.weight = torch.nn.Parameter(torch.empty(0), requires_grad=False) - elif ops._supports_onednn and ( - current_platform.get_cpu_architecture() == CpuArchEnum.X86 - or ops.is_onednn_acl_supported() - ): + elif ops._supports_onednn: origin_weight = layer.weight if remove_weight: layer.weight = torch.nn.Parameter(torch.empty(0), requires_grad=False)