mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-24 16:35:55 +08:00
Fix CompressedTensorsWNA16MoE with grouped scales (#13769)
This commit is contained in:
parent
18e505930d
commit
4d251ad00e
@ -527,7 +527,8 @@ class CompressedTensorsWNA16MoEMethod(CompressedTensorsMoEMethod):
|
||||
replace_tensor("w13_weight_scale", marlin_w13_scales)
|
||||
marlin_w2_scales = marlin_moe_permute_scales(
|
||||
layer.w2_weight_scale,
|
||||
layer.w2_weight_scale.shape[1] * self.packed_factor,
|
||||
layer.w2_weight_scale.shape[1] *
|
||||
(self.group_size if self.group_size != -1 else self.packed_factor),
|
||||
size_k2,
|
||||
self.group_size,
|
||||
self.num_bits,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user