mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-06 18:09:10 +08:00
[Bugfix] Fix DeepSeek R1 MTP weight loading (#29545)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com> Co-authored-by: Benjamin Chislett <bchislett@nvidia.com>
This commit is contained in:
parent
60c3d413af
commit
51c57b51dd
@ -346,11 +346,16 @@ class DeepSeekMTP(nn.Module, SupportsPP, DeepseekV2MixtureOfExperts):
|
|||||||
# Use expert_params_mapping to locate the destination
|
# Use expert_params_mapping to locate the destination
|
||||||
# param and delegate to its expert-aware weight_loader
|
# param and delegate to its expert-aware weight_loader
|
||||||
# with expert_id.
|
# with expert_id.
|
||||||
|
is_expert_weight = False
|
||||||
for mapping in expert_params_mapping:
|
for mapping in expert_params_mapping:
|
||||||
param_name, weight_name, expert_id, shard_id = mapping
|
param_name, weight_name, expert_id, shard_id = mapping
|
||||||
if weight_name not in chunk_name:
|
if weight_name not in chunk_name:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Anyway, this is an expert weight and should not be
|
||||||
|
# attempted to load as other weights later
|
||||||
|
is_expert_weight = True
|
||||||
|
|
||||||
# Do not modify `name` since the loop may continue here
|
# Do not modify `name` since the loop may continue here
|
||||||
# Instead, create a new variable
|
# Instead, create a new variable
|
||||||
name_mapped = chunk_name.replace(weight_name, param_name)
|
name_mapped = chunk_name.replace(weight_name, param_name)
|
||||||
@ -377,6 +382,12 @@ class DeepSeekMTP(nn.Module, SupportsPP, DeepseekV2MixtureOfExperts):
|
|||||||
loaded_params.add(name_mapped)
|
loaded_params.add(name_mapped)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
|
if is_expert_weight:
|
||||||
|
# We've checked that this is an expert weight
|
||||||
|
# However it's not mapped locally to this rank
|
||||||
|
# So we simply skip it
|
||||||
|
continue
|
||||||
|
|
||||||
# Skip loading extra bias for GPTQ models.
|
# Skip loading extra bias for GPTQ models.
|
||||||
if name.endswith(".bias") and name not in params_dict:
|
if name.endswith(".bias") and name not in params_dict:
|
||||||
continue
|
continue
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user