mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 18:06:03 +08:00
[Model] fix glm4_moe_mtp load weights with GLM-4.6 checkpoint. (#27597)
Signed-off-by: wuao.scotty <wuao.scotty@bytedance.com> Co-authored-by: wuao.scotty <wuao.scotty@bytedance.com>
This commit is contained in:
parent
1761dea1a8
commit
d3ade61e42
@ -256,11 +256,18 @@ class Glm4MoeMTP(nn.Module, SupportsPP, Glm4MixtureOfExperts):
|
|||||||
|
|
||||||
params_dict = dict(self.named_parameters())
|
params_dict = dict(self.named_parameters())
|
||||||
loaded_params: set[str] = set()
|
loaded_params: set[str] = set()
|
||||||
|
spec_layer = self.model.mtp_start_layer_idx
|
||||||
for name, loaded_weight in weights:
|
for name, loaded_weight in weights:
|
||||||
spec_layer = get_spec_layer_idx_from_weight_name(self.config, name)
|
if name == "lm_head.weight":
|
||||||
if spec_layer is None:
|
name = f"model.layers.{spec_layer}.shard_head.head.weight"
|
||||||
continue
|
elif name == "model.embed_tokens.weight":
|
||||||
name = self._rewrite_spec_layer_name(spec_layer, name)
|
# This name is same with local model, rewriting is not needed.
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
spec_layer = get_spec_layer_idx_from_weight_name(self.config, name)
|
||||||
|
if spec_layer is None:
|
||||||
|
continue
|
||||||
|
name = self._rewrite_spec_layer_name(spec_layer, name)
|
||||||
for param_name, weight_name, shard_id in stacked_params_mapping:
|
for param_name, weight_name, shard_id in stacked_params_mapping:
|
||||||
# Skip non-stacked layers and experts (experts handled below).
|
# Skip non-stacked layers and experts (experts handled below).
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user