mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-25 10:16:32 +08:00
Fix: Correct FusedMoE layer reference in auto_round quantization (#24818)
Signed-off-by: David-Wen <18927700430@163.com> Signed-off-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
d0a1364188
commit
9da51c77a9
@ -241,7 +241,7 @@ class AutoRoundConfig(QuantizationConfig):
|
||||
|
||||
if isinstance(layer, FusedMoE):
|
||||
if use_marlin:
|
||||
return AWQMoEMethod(quant_args_marlin, layer.moe)
|
||||
return AWQMoEMethod(quant_args_marlin, layer.moe_config)
|
||||
from vllm.model_executor.layers.quantization.moe_wna16 import (
|
||||
MoeWNA16Config)
|
||||
|
||||
@ -327,7 +327,7 @@ class AutoRoundConfig(QuantizationConfig):
|
||||
|
||||
if isinstance(layer, FusedMoE):
|
||||
if use_marlin:
|
||||
return GPTQMarlinMoEMethod(quant_args_marlin, layer.moe)
|
||||
return GPTQMarlinMoEMethod(quant_args_marlin, layer.moe_config)
|
||||
else:
|
||||
from vllm.model_executor.layers.quantization.moe_wna16 import (
|
||||
MoeWNA16Config)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user