diff --git a/vllm/model_executor/layers/quantization/bitsandbytes.py b/vllm/model_executor/layers/quantization/bitsandbytes.py index 38935bc967855..53ba84ea8e754 100644 --- a/vllm/model_executor/layers/quantization/bitsandbytes.py +++ b/vllm/model_executor/layers/quantization/bitsandbytes.py @@ -71,9 +71,7 @@ class BitsAndBytesConfig(QuantizationConfig): @staticmethod def get_config_filenames() -> list[str]: - return [ - "adapter_config.json", - ] + return [] @classmethod def from_config(cls, config: dict[str, Any]) -> "BitsAndBytesConfig": diff --git a/vllm/model_executor/model_loader/bitsandbytes_loader.py b/vllm/model_executor/model_loader/bitsandbytes_loader.py index ebbb021cad645..068a4e355ff8d 100644 --- a/vllm/model_executor/model_loader/bitsandbytes_loader.py +++ b/vllm/model_executor/model_loader/bitsandbytes_loader.py @@ -392,7 +392,8 @@ class BitsAndBytesModelLoader(BaseModelLoader): def _get_bnb_target_modules(self, model: nn.Module) -> None: for name, module in model.named_modules(): - if isinstance(module, (LinearBase, )): + if (isinstance(module, LinearBase) and + hasattr(module.quant_method, "quant_config")): if modules_info := self.modules_mapping.get_sub_modules(name): # Map vllm's names to transformers's names. rep_name, sub_modules = modules_info