From e5ef4dfc11abfc44494963b85ced1c79d1d5efea Mon Sep 17 00:00:00 2001 From: toncao <130689535+toncao@users.noreply.github.com> Date: Fri, 31 Oct 2025 16:36:37 +0700 Subject: [PATCH] [Kimi-Linear] Correct prefixes and add compatibility to AWQ quants (#27834) Signed-off-by: toncao Co-authored-by: toncao --- vllm/model_executor/models/kimi_linear.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/kimi_linear.py b/vllm/model_executor/models/kimi_linear.py index a60a8d764d9d1..f8df72b067dd0 100644 --- a/vllm/model_executor/models/kimi_linear.py +++ b/vllm/model_executor/models/kimi_linear.py @@ -155,6 +155,7 @@ class KimiMoE(nn.Module): hidden_act=config.hidden_act, quant_config=quant_config, reduce_results=False, + prefix=f"{prefix}.shared_experts", ) def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: @@ -340,7 +341,7 @@ class KimiDecoderLayer(nn.Module): self.block_sparse_moe = KimiMoE( config=config, quant_config=quant_config, - prefix=f"{prefix}.mlp", + prefix=f"{prefix}.block_sparse_moe", ) self.mlp = self.block_sparse_moe else: