[Model][Quantization] Fix / Add GGUF support for Qwen2 MoE models (#30307)

Signed-off-by: Tsukasa OI <floss_llm@irq.a4lg.com>
This commit is contained in:
Tsukasa OI 2025-12-10 04:13:10 +09:00 committed by GitHub
parent b37bf51e75
commit 73a484caa1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -367,6 +367,8 @@ class Qwen2MoeModel(nn.Module):
self.embed_tokens = VocabParallelEmbedding(
config.vocab_size,
config.hidden_size,
quant_config=quant_config,
prefix=f"{prefix}.embed_tokens",
)
self.start_layer, self.end_layer, self.layers = make_layers(
config.num_hidden_layers,
@ -512,6 +514,12 @@ class Qwen2MoeModel(nn.Module):
continue
else:
name = remapped_kv_scale_name
# GGUF: make sure that shared_expert_gate is a 2D tensor.
if (
"mlp.shared_expert_gate" in name
and len(loaded_weight.shape) == 1
):
loaded_weight = loaded_weight[None, :]
param = params_dict[name]
weight_loader = getattr(
param, "weight_loader", default_weight_loader