[Model] support bitsandbytes quantization with minicpm model (#10842)

Signed-off-by: Ubuntu <zixuanzhang@bytedance.com>
2026-01-09 22:46:32 +08:00 · 2024-12-02 19:06:41 -08:00 · 2024-12-02 19:06:41 -08:00 · d746268e92
commit d746268e92
parent 4433195ab7
1 changed files with 10 additions and 0 deletions
--- a/vllm/model_executor/models/minicpm.py
+++ b/vllm/model_executor/models/minicpm.py
@ -534,6 +534,16 @@ class MiniCPMForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
    }
    embedding_padding_modules = ["lm_head"]

+    # BitandBytes specific attributes
+    bitsandbytes_stacked_params_mapping = {
+        # shard_name, weight_name, index
+        "q_proj": ("qkv_proj", 0),
+        "k_proj": ("qkv_proj", 1),
+        "v_proj": ("qkv_proj", 2),
+        "gate_proj": ("gate_up_proj", 0),
+        "up_proj": ("gate_up_proj", 1),
+    }
+
    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        super().__init__()
        config = vllm_config.model_config.hf_config