mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-25 18:01:50 +08:00
[Bugfix] Fix AWQ marlin layer skipping (#27416)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
parent
0825197bee
commit
81d5bb765a
@ -178,7 +178,10 @@ class AWQMarlinConfig(QuantizationConfig):
|
||||
isinstance(layer, ParallelLMHead) and self.lm_head_quantized
|
||||
):
|
||||
if is_layer_skipped(
|
||||
prefix, self.modules_to_not_convert, self.packed_modules_mapping
|
||||
prefix,
|
||||
self.modules_to_not_convert,
|
||||
self.packed_modules_mapping,
|
||||
skip_with_substr=True,
|
||||
):
|
||||
return UnquantizedLinearMethod()
|
||||
# Check if the layer is supported by AWQMarlin.
|
||||
@ -194,7 +197,11 @@ class AWQMarlinConfig(QuantizationConfig):
|
||||
elif isinstance(layer, FusedMoE):
|
||||
from vllm.model_executor.layers.quantization.moe_wna16 import MoeWNA16Config
|
||||
|
||||
if is_layer_skipped(prefix, getattr(self, "modules_to_not_convert", [])):
|
||||
if is_layer_skipped(
|
||||
prefix,
|
||||
getattr(self, "modules_to_not_convert", []),
|
||||
skip_with_substr=True,
|
||||
):
|
||||
return UnquantizedFusedMoEMethod(layer.moe_config)
|
||||
if not check_moe_marlin_supports_layer(layer, self.group_size):
|
||||
logger.warning_once(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user