[Bugfix] Fix gptq_marlin for deepseek-v3 (#13750)

Signed-off-by: dangshunya <dangshunya@baichuan-inc.com>
Co-authored-by: dangshunya <dangshunya@baichuan-inc.com>
This commit is contained in:
rainkert 2025-03-05 12:25:53 +08:00 committed by GitHub
parent e123aafdf0
commit 0df25101d6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -569,7 +569,9 @@ class GPTQMarlinMoEMethod(FusedMoEMethodBase):
replace_parameter(layer, "w13_scales", marlin_w13_scales)
marlin_w2_scales = marlin_moe_permute_scales(
s=layer.w2_scales,
size_k=layer.w2_scales.shape[1] * self.quant_config.pack_factor,
size_k=layer.w2_scales.shape[1] *
(self.quant_config.group_size if self.quant_config.group_size != -1
else self.quant_config.pack_factor),
size_n=layer.w2_scales.shape[2],
group_size=self.quant_config.group_size,
)