mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 05:15:42 +08:00
[CPU] Disable oneDNN linear on non-x86 platforms (#25166)
Signed-off-by: jiang1.li <jiang1.li@intel.com>
This commit is contained in:
parent
486c5599e3
commit
8c1d4acbfe
@ -7,7 +7,7 @@ import torch
|
||||
|
||||
from vllm import _custom_ops as ops
|
||||
from vllm import envs
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.platforms import CpuArchEnum, current_platform
|
||||
from vllm.utils import direct_register_custom_op
|
||||
|
||||
|
||||
@ -167,7 +167,8 @@ def dispatch_cpu_unquantized_gemm(
|
||||
if remove_weight:
|
||||
layer.weight = torch.nn.Parameter(torch.empty(0),
|
||||
requires_grad=False)
|
||||
elif ops._supports_onednn:
|
||||
elif (ops._supports_onednn
|
||||
and current_platform.get_cpu_architecture() == CpuArchEnum.X86):
|
||||
origin_weight = layer.weight
|
||||
if remove_weight:
|
||||
layer.weight = torch.nn.Parameter(torch.empty(0),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user