diff --git a/csrc/cpu/utils.hpp b/csrc/cpu/utils.hpp index d8399c56f6af8..d3def306b8069 100644 --- a/csrc/cpu/utils.hpp +++ b/csrc/cpu/utils.hpp @@ -6,6 +6,10 @@ #include #include +#if defined(__APPLE__) + #include +#endif + #include "cpu_types.hpp" namespace cpu_utils { @@ -21,10 +25,12 @@ struct VecTypeTrait { using vec_t = vec_op::FP32Vec16; }; +#if !defined(__aarch64__) || defined(ARM_BF16_SUPPORT) template <> struct VecTypeTrait { using vec_t = vec_op::BF16Vec16; }; +#endif template <> struct VecTypeTrait { @@ -44,9 +50,21 @@ struct Counter { inline int64_t get_l2_size() { static int64_t size = []() { +#if defined(__APPLE__) + // macOS doesn't have _SC_LEVEL2_CACHE_SIZE. Use sysctlbyname. + int64_t l2_cache_size = 0; + size_t len = sizeof(l2_cache_size); + if (sysctlbyname("hw.l2cachesize", &l2_cache_size, &len, NULL, 0) == 0 && + l2_cache_size > 0) { + return l2_cache_size >> 1; // use 50% of L2 cache + } + // Fallback if sysctlbyname fails + return 128LL * 1024 >> 1; // use 50% of 128KB +#else long l2_cache_size = sysconf(_SC_LEVEL2_CACHE_SIZE); assert(l2_cache_size != -1); return l2_cache_size >> 1; // use 50% of L2 cache +#endif }(); return size; }