[Model] support bitsandbytes quantization with minicpm3 model (#10682)

Signed-off-by: Ubuntu <zixuanzhang@bytedance.com>
2026-01-05 03:24:03 +08:00 · 2024-11-27 23:58:02 -08:00 · 2024-11-27 23:58:02 -08:00 · 70dc14fbd0
commit 70dc14fbd0
parent cb4e1c3f3a
1 changed files with 6 additions and 0 deletions
--- a/vllm/model_executor/models/minicpm3.py
+++ b/vllm/model_executor/models/minicpm3.py
@ -241,6 +241,12 @@ class MiniCPM3ForCausalLM(MiniCPMForCausalLM):
    # `embedding_modules` and `embedding_padding_modules`
    # are inherited from MiniCPMForCausalLM

+    bitsandbytes_stacked_params_mapping = {
+        # shard_name, weight_name, index
+        "gate_proj": ("gate_up_proj", 0),
+        "up_proj": ("gate_up_proj", 1),
+    }
+
    def _init_model(self, *, vllm_config: VllmConfig, prefix: str = ""):
        self.model = MiniCPM3Model(vllm_config=vllm_config,
                                   prefix=maybe_prefix(prefix, "model"))