[Misc] Improve BNB loader to handle mixture of sharded and merged weights with same suffix (#11566)

Signed-off-by: Isotr0py <2037008807@qq.com>
2026-03-16 18:07:06 +08:00 · 2024-12-28 03:45:13 +08:00 · 2024-12-28 03:45:13 +08:00 · dde1fa18c9
commit dde1fa18c9
parent 0240402c46
1 changed files with 5 additions and 2 deletions
--- a/vllm/model_executor/model_loader/loader.py
+++ b/vllm/model_executor/model_loader/loader.py
@ -1001,8 +1001,11 @@ class BitsAndBytesModelLoader(BaseModelLoader):
                    for sub_name in sub_modules:
                        self.target_modules.append(
                            name.replace(last_name, sub_name))
-                else:
-                    self.target_modules.append(name)
+                # Add original module name even if the module has stacked map,
+                # in case model has a mixture of disk-merged and disk-splitted
+                # weights with same last name.
+                self.target_modules.append(name)
+
        assert (self.target_modules
                ), "vllm currently does not support BNB quantization for"
        f" {type(model).__name__}"