[Model] fix glm4_moe_mtp load weights with GLM-4.6 checkpoint. (#27597)

Signed-off-by: wuao.scotty <wuao.scotty@bytedance.com> Co-authored-by: wuao.scotty <wuao.scotty@bytedance.com>
2025-12-15 17:25:01 +08:00 · 2025-11-12 17:14:00 +07:00 · 2025-11-12 17:14:00 +07:00 · d3ade61e42
commit d3ade61e42
parent 1761dea1a8
1 changed files with 11 additions and 4 deletions
--- a/vllm/model_executor/models/glm4_moe_mtp.py
+++ b/vllm/model_executor/models/glm4_moe_mtp.py
@ -256,7 +256,14 @@ class Glm4MoeMTP(nn.Module, SupportsPP, Glm4MixtureOfExperts):
        params_dict = dict(self.named_parameters())
        loaded_params: set[str] = set()
        spec_layer = self.model.mtp_start_layer_idx
        for name, loaded_weight in weights:
            if name == "lm_head.weight":
                name = f"model.layers.{spec_layer}.shard_head.head.weight"
            elif name == "model.embed_tokens.weight":
                # This name is same with local model, rewriting is not needed.
                pass
            else:
                spec_layer = get_spec_layer_idx_from_weight_name(self.config, name)
                if spec_layer is None:
                    continue