[Model] support bitsandbytes quantization with minicpm model (#10842)

Signed-off-by: Ubuntu <zixuanzhang@bytedance.com>
2026-06-11 19:02:16 +08:00 · 2024-12-02 19:06:41 -08:00 · 2024-12-02 19:06:41 -08:00 · d746268e92
commit d746268e92
parent 4433195ab7
1 changed files with 10 additions and 0 deletions
--- a/vllm/model_executor/models/minicpm.py
+++ b/vllm/model_executor/models/minicpm.py
@ -534,6 +534,16 @@ class MiniCPMForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
    }
    embedding_padding_modules = ["lm_head"]
    # BitandBytes specific attributes
    bitsandbytes_stacked_params_mapping = {
        # shard_name, weight_name, index
        "q_proj": ("qkv_proj", 0),
        "k_proj": ("qkv_proj", 1),
        "v_proj": ("qkv_proj", 2),
        "gate_proj": ("gate_up_proj", 0),
        "up_proj": ("gate_up_proj", 1),
    }
    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        super().__init__()
        config = vllm_config.model_config.hf_config