diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py index 1ca92273430dd..bf5141fa48943 100644 --- a/vllm/model_executor/layers/quantization/auto_round.py +++ b/vllm/model_executor/layers/quantization/auto_round.py @@ -241,7 +241,7 @@ class AutoRoundConfig(QuantizationConfig): if isinstance(layer, FusedMoE): if use_marlin: - return AWQMoEMethod(quant_args_marlin, layer.moe) + return AWQMoEMethod(quant_args_marlin, layer.moe_config) from vllm.model_executor.layers.quantization.moe_wna16 import ( MoeWNA16Config) @@ -327,7 +327,7 @@ class AutoRoundConfig(QuantizationConfig): if isinstance(layer, FusedMoE): if use_marlin: - return GPTQMarlinMoEMethod(quant_args_marlin, layer.moe) + return GPTQMarlinMoEMethod(quant_args_marlin, layer.moe_config) else: from vllm.model_executor.layers.quantization.moe_wna16 import ( MoeWNA16Config)