[Bugfix] Fix AWQ marlin layer skipping (#27416)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Isotr0py 2025-10-24 02:30:28 +08:00 committed by GitHub
parent 0825197bee
commit 81d5bb765a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -178,7 +178,10 @@ class AWQMarlinConfig(QuantizationConfig):
isinstance(layer, ParallelLMHead) and self.lm_head_quantized
):
if is_layer_skipped(
prefix, self.modules_to_not_convert, self.packed_modules_mapping
prefix,
self.modules_to_not_convert,
self.packed_modules_mapping,
skip_with_substr=True,
):
return UnquantizedLinearMethod()
# Check if the layer is supported by AWQMarlin.
@ -194,7 +197,11 @@ class AWQMarlinConfig(QuantizationConfig):
elif isinstance(layer, FusedMoE):
from vllm.model_executor.layers.quantization.moe_wna16 import MoeWNA16Config
if is_layer_skipped(prefix, getattr(self, "modules_to_not_convert", [])):
if is_layer_skipped(
prefix,
getattr(self, "modules_to_not_convert", []),
skip_with_substr=True,
):
return UnquantizedFusedMoEMethod(layer.moe_config)
if not check_moe_marlin_supports_layer(layer, self.group_size):
logger.warning_once(