mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-01 21:05:30 +08:00
[Bugfix] Fix gptq_marlin for deepseek-v3 (#13750)
Signed-off-by: dangshunya <dangshunya@baichuan-inc.com> Co-authored-by: dangshunya <dangshunya@baichuan-inc.com>
This commit is contained in:
parent
e123aafdf0
commit
0df25101d6
@ -569,7 +569,9 @@ class GPTQMarlinMoEMethod(FusedMoEMethodBase):
|
||||
replace_parameter(layer, "w13_scales", marlin_w13_scales)
|
||||
marlin_w2_scales = marlin_moe_permute_scales(
|
||||
s=layer.w2_scales,
|
||||
size_k=layer.w2_scales.shape[1] * self.quant_config.pack_factor,
|
||||
size_k=layer.w2_scales.shape[1] *
|
||||
(self.quant_config.group_size if self.quant_config.group_size != -1
|
||||
else self.quant_config.pack_factor),
|
||||
size_n=layer.w2_scales.shape[2],
|
||||
group_size=self.quant_config.group_size,
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user