From bca55b556f6a0cc7f8d75d5e12205be44eacd381 Mon Sep 17 00:00:00 2001 From: Random Fly Date: Tue, 20 May 2025 15:54:33 +0800 Subject: [PATCH] [Bugfix] fix adding bias twice in ipex GPTQ quantization (#18363) Signed-off-by: rand-fly --- vllm/model_executor/layers/quantization/ipex_quant.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vllm/model_executor/layers/quantization/ipex_quant.py b/vllm/model_executor/layers/quantization/ipex_quant.py index 8bce6bba460ac..b7baa3d3363bf 100644 --- a/vllm/model_executor/layers/quantization/ipex_quant.py +++ b/vllm/model_executor/layers/quantization/ipex_quant.py @@ -181,8 +181,6 @@ class IPEXGPTQLinearMethod(GPTQLinearMethod): bias: Optional[torch.Tensor] = None) -> torch.Tensor: reshaped_x = x.reshape(-1, x.shape[-1]) out = layer.ipex_qlinear(reshaped_x) - if bias is not None: - out.add_(bias) return out.reshape(x.shape[:-1] + (layer.ipex_output_size, ))