mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-28 23:27:13 +08:00
[Bugfix] Fix AWQ marlin layer skipping (#27416)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
parent
0825197bee
commit
81d5bb765a
@ -178,7 +178,10 @@ class AWQMarlinConfig(QuantizationConfig):
|
|||||||
isinstance(layer, ParallelLMHead) and self.lm_head_quantized
|
isinstance(layer, ParallelLMHead) and self.lm_head_quantized
|
||||||
):
|
):
|
||||||
if is_layer_skipped(
|
if is_layer_skipped(
|
||||||
prefix, self.modules_to_not_convert, self.packed_modules_mapping
|
prefix,
|
||||||
|
self.modules_to_not_convert,
|
||||||
|
self.packed_modules_mapping,
|
||||||
|
skip_with_substr=True,
|
||||||
):
|
):
|
||||||
return UnquantizedLinearMethod()
|
return UnquantizedLinearMethod()
|
||||||
# Check if the layer is supported by AWQMarlin.
|
# Check if the layer is supported by AWQMarlin.
|
||||||
@ -194,7 +197,11 @@ class AWQMarlinConfig(QuantizationConfig):
|
|||||||
elif isinstance(layer, FusedMoE):
|
elif isinstance(layer, FusedMoE):
|
||||||
from vllm.model_executor.layers.quantization.moe_wna16 import MoeWNA16Config
|
from vllm.model_executor.layers.quantization.moe_wna16 import MoeWNA16Config
|
||||||
|
|
||||||
if is_layer_skipped(prefix, getattr(self, "modules_to_not_convert", [])):
|
if is_layer_skipped(
|
||||||
|
prefix,
|
||||||
|
getattr(self, "modules_to_not_convert", []),
|
||||||
|
skip_with_substr=True,
|
||||||
|
):
|
||||||
return UnquantizedFusedMoEMethod(layer.moe_config)
|
return UnquantizedFusedMoEMethod(layer.moe_config)
|
||||||
if not check_moe_marlin_supports_layer(layer, self.group_size):
|
if not check_moe_marlin_supports_layer(layer, self.group_size):
|
||||||
logger.warning_once(
|
logger.warning_once(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user