[CPU] Disable oneDNN linear on non-x86 platforms (#25166)

Signed-off-by: jiang1.li <jiang1.li@intel.com>
2026-01-28 13:27:15 +08:00 · 2025-09-19 15:27:22 +08:00 · 2025-09-19 15:27:22 +08:00 · 8c1d4acbfe
commit 8c1d4acbfe
parent 486c5599e3
1 changed files with 3 additions and 2 deletions
--- a/vllm/model_executor/layers/utils.py
+++ b/vllm/model_executor/layers/utils.py
@ -7,7 +7,7 @@ import torch

 from vllm import _custom_ops as ops
 from vllm import envs
-from vllm.platforms import current_platform
+from vllm.platforms import CpuArchEnum, current_platform
 from vllm.utils import direct_register_custom_op


@ -167,7 +167,8 @@ def dispatch_cpu_unquantized_gemm(
        if remove_weight:
            layer.weight = torch.nn.Parameter(torch.empty(0),
                                              requires_grad=False)
-    elif ops._supports_onednn:
+    elif (ops._supports_onednn
+          and current_platform.get_cpu_architecture() == CpuArchEnum.X86):
        origin_weight = layer.weight
        if remove_weight:
            layer.weight = torch.nn.Parameter(torch.empty(0),