[Bugfix] Fix gptq_marlin for deepseek-v3 (#13750)

Signed-off-by: dangshunya <dangshunya@baichuan-inc.com> Co-authored-by: dangshunya <dangshunya@baichuan-inc.com>
2026-06-26 12:17:12 +08:00 · 2025-03-05 12:25:53 +08:00 · 2025-03-05 12:25:53 +08:00 · 0df25101d6
commit 0df25101d6
parent e123aafdf0
1 changed files with 3 additions and 1 deletions
--- a/vllm/model_executor/layers/quantization/gptq_marlin.py
+++ b/vllm/model_executor/layers/quantization/gptq_marlin.py
@ -569,7 +569,9 @@ class GPTQMarlinMoEMethod(FusedMoEMethodBase):
        replace_parameter(layer, "w13_scales", marlin_w13_scales)
        marlin_w2_scales = marlin_moe_permute_scales(
            s=layer.w2_scales,
-            size_k=layer.w2_scales.shape[1] * self.quant_config.pack_factor,
+            size_k=layer.w2_scales.shape[1] *
+            (self.quant_config.group_size if self.quant_config.group_size != -1
+             else self.quant_config.pack_factor),
            size_n=layer.w2_scales.shape[2],
            group_size=self.quant_config.group_size,
        )