From 0df25101d6dc3b83edeb813534b2d0931c79431a Mon Sep 17 00:00:00 2001 From: rainkert <93575312+rainkert@users.noreply.github.com> Date: Wed, 5 Mar 2025 12:25:53 +0800 Subject: [PATCH] [Bugfix] Fix gptq_marlin for deepseek-v3 (#13750) Signed-off-by: dangshunya Co-authored-by: dangshunya --- vllm/model_executor/layers/quantization/gptq_marlin.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/gptq_marlin.py b/vllm/model_executor/layers/quantization/gptq_marlin.py index 21db8ccba059c..9f53ffc1d7f6a 100644 --- a/vllm/model_executor/layers/quantization/gptq_marlin.py +++ b/vllm/model_executor/layers/quantization/gptq_marlin.py @@ -569,7 +569,9 @@ class GPTQMarlinMoEMethod(FusedMoEMethodBase): replace_parameter(layer, "w13_scales", marlin_w13_scales) marlin_w2_scales = marlin_moe_permute_scales( s=layer.w2_scales, - size_k=layer.w2_scales.shape[1] * self.quant_config.pack_factor, + size_k=layer.w2_scales.shape[1] * + (self.quant_config.group_size if self.quant_config.group_size != -1 + else self.quant_config.pack_factor), size_n=layer.w2_scales.shape[2], group_size=self.quant_config.group_size, )