mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-31 23:07:10 +08:00
[Bugfix] Fix gptq_marlin for deepseek-v3 (#13750)
Signed-off-by: dangshunya <dangshunya@baichuan-inc.com> Co-authored-by: dangshunya <dangshunya@baichuan-inc.com>
This commit is contained in:
parent
e123aafdf0
commit
0df25101d6
@ -569,7 +569,9 @@ class GPTQMarlinMoEMethod(FusedMoEMethodBase):
|
|||||||
replace_parameter(layer, "w13_scales", marlin_w13_scales)
|
replace_parameter(layer, "w13_scales", marlin_w13_scales)
|
||||||
marlin_w2_scales = marlin_moe_permute_scales(
|
marlin_w2_scales = marlin_moe_permute_scales(
|
||||||
s=layer.w2_scales,
|
s=layer.w2_scales,
|
||||||
size_k=layer.w2_scales.shape[1] * self.quant_config.pack_factor,
|
size_k=layer.w2_scales.shape[1] *
|
||||||
|
(self.quant_config.group_size if self.quant_config.group_size != -1
|
||||||
|
else self.quant_config.pack_factor),
|
||||||
size_n=layer.w2_scales.shape[2],
|
size_n=layer.w2_scales.shape[2],
|
||||||
group_size=self.quant_config.group_size,
|
group_size=self.quant_config.group_size,
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user