mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 08:45:01 +08:00
[Kimi-Linear] Correct prefixes and add compatibility to AWQ quants (#27834)
Signed-off-by: toncao <cpatonn@gmail.com> Co-authored-by: toncao <cpatonn@gmail.com>
This commit is contained in:
parent
36960501d3
commit
e5ef4dfc11
@ -155,6 +155,7 @@ class KimiMoE(nn.Module):
|
||||
hidden_act=config.hidden_act,
|
||||
quant_config=quant_config,
|
||||
reduce_results=False,
|
||||
prefix=f"{prefix}.shared_experts",
|
||||
)
|
||||
|
||||
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
||||
@ -340,7 +341,7 @@ class KimiDecoderLayer(nn.Module):
|
||||
self.block_sparse_moe = KimiMoE(
|
||||
config=config,
|
||||
quant_config=quant_config,
|
||||
prefix=f"{prefix}.mlp",
|
||||
prefix=f"{prefix}.block_sparse_moe",
|
||||
)
|
||||
self.mlp = self.block_sparse_moe
|
||||
else:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user