From 0b25498990f01ea2553c02731d6e2ce2d550156a Mon Sep 17 00:00:00 2001 From: haoyangli-amd Date: Fri, 14 Nov 2025 13:56:35 +0800 Subject: [PATCH] [Misc] add ignore mapper for quark quantization (#28275) Signed-off-by: Haoyang Li --- .../layers/quantization/quark/quark.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/layers/quantization/quark/quark.py b/vllm/model_executor/layers/quantization/quark/quark.py index 095a66ef10f9..1bb698faf46d 100644 --- a/vllm/model_executor/layers/quantization/quark/quark.py +++ b/vllm/model_executor/layers/quantization/quark/quark.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import fnmatch -from typing import Any, Optional, cast +from typing import TYPE_CHECKING, Any, Optional, cast import torch @@ -34,6 +34,9 @@ from vllm.model_executor.layers.quantization.quark.utils import ( ) from vllm.platforms import current_platform +if TYPE_CHECKING: + from vllm.model_executor.models.utils import WeightsMapper + __all__ = ["QuarkLinearMethod"] logger = init_logger(__name__) @@ -54,6 +57,7 @@ class QuarkConfig(QuantizationConfig): self.kv_cache_group = kv_cache_group self.kv_cache_config = kv_cache_config self.pack_method = pack_method + self.ignore: list[str] = cast(list[str], self.quant_config.get("exclude", [])) def get_linear_method(self) -> "QuarkLinearMethod": return QuarkLinearMethod(self) @@ -74,9 +78,8 @@ class QuarkConfig(QuantizationConfig): from vllm.attention.layer import Attention # Avoid circular import # Check if the layer is skipped for quantization. - exclude_layers = cast(list[str], self.quant_config.get("exclude")) if should_ignore_layer( - prefix, ignore=exclude_layers, fused_mapping=self.packed_modules_mapping + prefix, ignore=self.ignore, fused_mapping=self.packed_modules_mapping ): return UnquantizedLinearMethod() if isinstance(layer, LinearBase): @@ -90,6 +93,9 @@ class QuarkConfig(QuantizationConfig): return QuarkMoEMethod.get_moe_method(self, module=layer, layer_name=prefix) return None + def apply_vllm_mapper(self, hf_to_vllm_mapper: "WeightsMapper"): + self.ignore = hf_to_vllm_mapper.apply_list(self.ignore) + @classmethod def from_config(cls, config: dict[str, Any]) -> "QuarkConfig": export_config = config.get("export")