From 0bb0bae43696d59f8e4d88bd7c6daa992fd31af4 Mon Sep 17 00:00:00 2001 From: Shengliang Xu <106840466+shengliangxu@users.noreply.github.com> Date: Sun, 14 Dec 2025 02:18:31 -0800 Subject: [PATCH] Nvidia ModelOpt workaround for issue 28072 (#30164) Signed-off-by: Shengliang Xu Co-authored-by: Pavani Majety --- .../layers/quantization/modelopt.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/modelopt.py b/vllm/model_executor/layers/quantization/modelopt.py index a3a8ec738dae2..030d85080a34d 100644 --- a/vllm/model_executor/layers/quantization/modelopt.py +++ b/vllm/model_executor/layers/quantization/modelopt.py @@ -188,7 +188,24 @@ class ModelOptQuantConfigBase(QuantizationConfig): def apply_vllm_mapper(self, hf_to_vllm_mapper: "WeightsMapper"): if len(self.exclude_modules) > 0: - self.exclude_modules = hf_to_vllm_mapper.apply_list(self.exclude_modules) + # This is a workaround for the weights remapping issue: + # https://github.com/vllm-project/vllm/issues/28072 + # Right now, the Nvidia ModelOpt library use just one wildcard pattern: + # module_path* + # It gets applied if the whole tree of modules rooted at module_path + # is not quantized. Here we replace such pattern by 2 patterns that are + # collectively equivalent to the original pattern: + # module_path + # module_path.* + new_exclude_modules = [] + for exclude in self.exclude_modules: + if len(exclude) >= 2 and exclude[-1] == "*" and exclude[-2] != ".": + new_exclude_modules.append(exclude[:-1]) + new_exclude_modules.append(exclude[:-1] + ".*") + else: + new_exclude_modules.append(exclude) + + self.exclude_modules = hf_to_vllm_mapper.apply_list(new_exclude_modules) @staticmethod def get_config_filenames() -> list[str]: