diff --git a/vllm/model_executor/models/glm4_moe_mtp.py b/vllm/model_executor/models/glm4_moe_mtp.py index 110ed0a646334..e34ae6c85a4f8 100644 --- a/vllm/model_executor/models/glm4_moe_mtp.py +++ b/vllm/model_executor/models/glm4_moe_mtp.py @@ -256,13 +256,12 @@ class Glm4MoeMTP(nn.Module, SupportsPP, Glm4MixtureOfExperts): params_dict = dict(self.named_parameters()) loaded_params: set[str] = set() - spec_layer = self.model.mtp_start_layer_idx for name, loaded_weight in weights: if name == "lm_head.weight": - name = f"model.layers.{spec_layer}.shard_head.head.weight" + spec_layer = self.model.mtp_start_layer_idx + name = f"model.layers.{spec_layer}.shared_head.head.weight" elif name == "model.embed_tokens.weight": - # This name is same with local model, rewriting is not needed. - pass + spec_layer = self.model.mtp_start_layer_idx else: spec_layer = get_spec_layer_idx_from_weight_name(self.config, name) if spec_layer is None: