From 2dff2e21d928129e985b23897e9f326abe3f1417 Mon Sep 17 00:00:00 2001 From: Benjamin Chislett Date: Thu, 31 Jul 2025 16:33:53 -0400 Subject: [PATCH] [Bugfix] Fix MTP weight loading (#21941) --- vllm/model_executor/models/deepseek_mtp.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vllm/model_executor/models/deepseek_mtp.py b/vllm/model_executor/models/deepseek_mtp.py index 911f0036c2dd6..2e026d582a6de 100644 --- a/vllm/model_executor/models/deepseek_mtp.py +++ b/vllm/model_executor/models/deepseek_mtp.py @@ -182,6 +182,8 @@ class DeepSeekMTP(nn.Module, SupportsPP): stacked_params_mapping = [ ("gate_up_proj", "gate_proj", 0), ("gate_up_proj", "up_proj", 1), + ("fused_qkv_a_proj", "q_a_proj", 0), + ("fused_qkv_a_proj", "kv_a_proj_with_mqa", 1), ] expert_params_mapping = FusedMoE.make_expert_params_mapping( @@ -212,6 +214,13 @@ class DeepSeekMTP(nn.Module, SupportsPP): if (("mlp.experts." in name) and name not in params_dict): continue name = name.replace(weight_name, param_name) + + # QKV fusion is optional, fall back to normal + # weight loading if it's not enabled + if ((param_name == "fused_qkv_a_proj") + and name not in params_dict): + continue + # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: continue