mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-09 07:04:33 +08:00
Merge b1139f3484269217d41c0bc6ca794526f81a8f5b into 254f6b986720c92ddf97fbb1a6a6465da8e87e29
This commit is contained in:
commit
fdab8ce4b0
@ -1366,6 +1366,10 @@ class FusedMoE(CustomOp):
|
|||||||
def load_weights(
|
def load_weights(
|
||||||
self, weights: Iterable[tuple[str, torch.Tensor]]
|
self, weights: Iterable[tuple[str, torch.Tensor]]
|
||||||
) -> Iterable[str]:
|
) -> Iterable[str]:
|
||||||
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
|
remap_expert_weight_name,
|
||||||
|
)
|
||||||
|
|
||||||
if (expert_mapping := self.expert_mapping) is None:
|
if (expert_mapping := self.expert_mapping) is None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"`self.expert_mapping` must be provided to "
|
"`self.expert_mapping` must be provided to "
|
||||||
@ -1376,7 +1380,10 @@ class FusedMoE(CustomOp):
|
|||||||
for param_name, weight_name, expert_id, shard_id in expert_mapping:
|
for param_name, weight_name, expert_id, shard_id in expert_mapping:
|
||||||
if weight_name not in qual_name:
|
if weight_name not in qual_name:
|
||||||
continue
|
continue
|
||||||
weight_name = qual_name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
weight_name = remap_expert_weight_name(
|
||||||
|
qual_name, weight_name, param_name
|
||||||
|
)
|
||||||
param_name = weight_name.removeprefix(f"{self.layer_name}.")
|
param_name = weight_name.removeprefix(f"{self.layer_name}.")
|
||||||
param = getattr(self, param_name)
|
param = getattr(self, param_name)
|
||||||
success = self.weight_loader(
|
success = self.weight_loader(
|
||||||
|
|||||||
@ -1178,3 +1178,38 @@ def maybe_remap_kv_scale_name(name: str, params_dict: dict) -> str | None:
|
|||||||
|
|
||||||
# If there were no matches, return the untouched param name
|
# If there were no matches, return the untouched param name
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
def remap_expert_weight_name(
|
||||||
|
name: str,
|
||||||
|
weight_name: str,
|
||||||
|
param_name: str,
|
||||||
|
) -> str:
|
||||||
|
"""Remap expert weight names, handling base_layer prefix for LoRA.
|
||||||
|
|
||||||
|
When loading expert weights, this function maps from checkpoint weight
|
||||||
|
names to model parameter names. It handles the special case where
|
||||||
|
LoRA wraps the original layer with a `base_layer` prefix.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
- Input: name="model.layers.0.mlp.experts.0.up_proj.base_layer.weight"
|
||||||
|
weight_name="experts.0.up_proj."
|
||||||
|
param_name="experts.w13_"
|
||||||
|
- Output: "model.layers.0.mlp.experts.base_layer.w13_weight"
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: The full checkpoint weight name.
|
||||||
|
weight_name: The weight name pattern to match (e.g., "experts.0.up_proj.").
|
||||||
|
param_name: The parameter name to substitute (e.g., "experts.w13_").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The remapped weight name with proper base_layer handling.
|
||||||
|
"""
|
||||||
|
prefix, _, suffix = name.partition(weight_name)
|
||||||
|
middle = param_name
|
||||||
|
base = "base_layer"
|
||||||
|
if suffix.startswith(f"{base}."):
|
||||||
|
param_list = param_name.split(".", 1)
|
||||||
|
param_list.insert(1, base)
|
||||||
|
middle = ".".join(param_list)
|
||||||
|
return prefix + middle + suffix.removeprefix(f"{base}.")
|
||||||
|
|||||||
@ -36,6 +36,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.models.interfaces import SupportsLoRA, SupportsPP
|
from vllm.model_executor.models.interfaces import SupportsLoRA, SupportsPP
|
||||||
from vllm.model_executor.models.llama import LlamaMLP as AfmoeMLP
|
from vllm.model_executor.models.llama import LlamaMLP as AfmoeMLP
|
||||||
@ -533,7 +534,10 @@ class AfmoeModel(nn.Module):
|
|||||||
|
|
||||||
# Do not modify `name` since the loop may continue here
|
# Do not modify `name` since the loop may continue here
|
||||||
# Instead, create a new variable
|
# Instead, create a new variable
|
||||||
name_mapped = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name_mapped = remap_expert_weight_name(
|
||||||
|
name, weight_name, param_name
|
||||||
|
)
|
||||||
|
|
||||||
if is_pp_missing_parameter(name_mapped, self):
|
if is_pp_missing_parameter(name_mapped, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -38,7 +38,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
ParallelLMHead,
|
ParallelLMHead,
|
||||||
VocabParallelEmbedding,
|
VocabParallelEmbedding,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
|
default_weight_loader,
|
||||||
|
remap_expert_weight_name,
|
||||||
|
)
|
||||||
from vllm.model_executor.utils import set_weight_attrs
|
from vllm.model_executor.utils import set_weight_attrs
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
@ -609,7 +612,8 @@ class ArcticForCausalLM(nn.Module, SupportsPP, SupportsQuant):
|
|||||||
for param_name, weight_name, shard_id in expert_params_mapping:
|
for param_name, weight_name, shard_id in expert_params_mapping:
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
param = params_dict[name]
|
param = params_dict[name]
|
||||||
|
|||||||
@ -55,7 +55,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
ParallelLMHead,
|
ParallelLMHead,
|
||||||
VocabParallelEmbedding,
|
VocabParallelEmbedding,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
|
default_weight_loader,
|
||||||
|
remap_expert_weight_name,
|
||||||
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
from .interfaces import SupportsLoRA, SupportsPP
|
from .interfaces import SupportsLoRA, SupportsPP
|
||||||
@ -524,7 +527,8 @@ class BailingMoeModel(nn.Module):
|
|||||||
param_name, weight_name, expert_id, shard_id = mapping
|
param_name, weight_name, expert_id, shard_id = mapping
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
|
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -31,6 +31,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
@ -411,7 +412,8 @@ class DbrxModel(nn.Module):
|
|||||||
for param_name, weight_name in expert_params_mapping:
|
for param_name, weight_name in expert_params_mapping:
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
param = params_dict[name]
|
param = params_dict[name]
|
||||||
|
|||||||
@ -18,6 +18,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.models.deepseek_v2 import (
|
from vllm.model_executor.models.deepseek_v2 import (
|
||||||
DeepseekV2DecoderLayer,
|
DeepseekV2DecoderLayer,
|
||||||
@ -155,7 +156,8 @@ class DeepseekV2Model(nn.Module):
|
|||||||
param_name, weight_name, expert_id, shard_id = mapping
|
param_name, weight_name, expert_id, shard_id = mapping
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
|
|
||||||
param = params_dict[name]
|
param = params_dict[name]
|
||||||
weight_loader = param.weight_loader
|
weight_loader = param.weight_loader
|
||||||
|
|||||||
@ -22,6 +22,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
@ -359,7 +360,10 @@ class DeepSeekMTP(nn.Module, SupportsPP, DeepseekV2MixtureOfExperts):
|
|||||||
|
|
||||||
# Do not modify `name` since the loop may continue here
|
# Do not modify `name` since the loop may continue here
|
||||||
# Instead, create a new variable
|
# Instead, create a new variable
|
||||||
name_mapped = chunk_name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name_mapped = remap_expert_weight_name(
|
||||||
|
chunk_name, weight_name, param_name
|
||||||
|
)
|
||||||
|
|
||||||
param = params_dict[name_mapped]
|
param = params_dict[name_mapped]
|
||||||
# We should ask the weight loader to return success or
|
# We should ask the weight loader to return success or
|
||||||
|
|||||||
@ -72,6 +72,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.models.utils import sequence_parallel_chunk
|
from vllm.model_executor.models.utils import sequence_parallel_chunk
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
@ -1643,7 +1644,10 @@ class DeepseekV2ForCausalLM(
|
|||||||
|
|
||||||
# Do not modify `name` since the loop may continue here
|
# Do not modify `name` since the loop may continue here
|
||||||
# Instead, create a new variable
|
# Instead, create a new variable
|
||||||
name_mapped = chunk_name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name_mapped = remap_expert_weight_name(
|
||||||
|
chunk_name, weight_name, param_name
|
||||||
|
)
|
||||||
|
|
||||||
if is_pp_missing_parameter(name_mapped, self):
|
if is_pp_missing_parameter(name_mapped, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -59,6 +59,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
@ -464,7 +465,8 @@ class Dots1Model(nn.Module):
|
|||||||
param_name, weight_name, expert_id, shard_id = mapping
|
param_name, weight_name, expert_id, shard_id = mapping
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
|
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -60,6 +60,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
from vllm.transformers_utils.config import set_default_rope_theta
|
from vllm.transformers_utils.config import set_default_rope_theta
|
||||||
@ -563,7 +564,11 @@ class Ernie4_5_MoeModel(nn.Module):
|
|||||||
|
|
||||||
# Do not modify `name` since the loop may continue here
|
# Do not modify `name` since the loop may continue here
|
||||||
# Instead, create a new variable
|
# Instead, create a new variable
|
||||||
name_mapped = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name_mapped = remap_expert_weight_name(
|
||||||
|
name, weight_name, param_name
|
||||||
|
)
|
||||||
|
|
||||||
# Skip layers on other devices.
|
# Skip layers on other devices.
|
||||||
if is_pp_missing_parameter(name_mapped, self):
|
if is_pp_missing_parameter(name_mapped, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -56,6 +56,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
from vllm.transformers_utils.config import set_default_rope_theta
|
from vllm.transformers_utils.config import set_default_rope_theta
|
||||||
@ -736,7 +737,8 @@ class Ernie4_5_VLMoeForCausalLM(nn.Module, SupportsPP):
|
|||||||
moe_offset = int(name.split(".")[-3])
|
moe_offset = int(name.split(".")[-3])
|
||||||
is_text_expert = moe_offset <= self.config.moe_num_experts[0] - 1
|
is_text_expert = moe_offset <= self.config.moe_num_experts[0] - 1
|
||||||
|
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
if is_text_expert:
|
if is_text_expert:
|
||||||
name = name.replace(".experts.", ".text_experts.")
|
name = name.replace(".experts.", ".text_experts.")
|
||||||
else:
|
else:
|
||||||
|
|||||||
@ -59,6 +59,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
@ -554,7 +555,10 @@ class Glm4MoeModel(nn.Module):
|
|||||||
|
|
||||||
# Do not modify `name` since the loop may continue here
|
# Do not modify `name` since the loop may continue here
|
||||||
# Instead, create a new variable
|
# Instead, create a new variable
|
||||||
name_mapped = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name_mapped = remap_expert_weight_name(
|
||||||
|
name, weight_name, param_name
|
||||||
|
)
|
||||||
|
|
||||||
if is_pp_missing_parameter(name_mapped, self):
|
if is_pp_missing_parameter(name_mapped, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -38,7 +38,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
ParallelLMHead,
|
ParallelLMHead,
|
||||||
VocabParallelEmbedding,
|
VocabParallelEmbedding,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
|
default_weight_loader,
|
||||||
|
remap_expert_weight_name,
|
||||||
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
from .glm4_moe import (
|
from .glm4_moe import (
|
||||||
@ -293,7 +296,8 @@ class Glm4MoeMTP(nn.Module, SupportsPP, Glm4MixtureOfExperts):
|
|||||||
param_name, weight_name, expert_id, shard_id = mapping
|
param_name, weight_name, expert_id, shard_id = mapping
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
|
|
||||||
param = params_dict[name]
|
param = params_dict[name]
|
||||||
weight_loader = param.weight_loader
|
weight_loader = param.weight_loader
|
||||||
|
|||||||
@ -56,6 +56,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.models.utils import sequence_parallel_chunk
|
from vllm.model_executor.models.utils import sequence_parallel_chunk
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
@ -401,7 +402,8 @@ class GraniteMoeModel(nn.Module):
|
|||||||
param_name, weight_name, expert_id, shard_id = mapping
|
param_name, weight_name, expert_id, shard_id = mapping
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
# Skip layers on other devices.
|
# Skip layers on other devices.
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -28,7 +28,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
ParallelLMHead,
|
ParallelLMHead,
|
||||||
VocabParallelEmbedding,
|
VocabParallelEmbedding,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
|
default_weight_loader,
|
||||||
|
remap_expert_weight_name,
|
||||||
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
from .granitemoe import GraniteMoeMoE
|
from .granitemoe import GraniteMoeMoE
|
||||||
@ -465,7 +468,8 @@ class GraniteMoeHybridModel(nn.Module):
|
|||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
name_mapped = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name_mapped = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
|
|
||||||
# Skip layers on other devices.
|
# Skip layers on other devices.
|
||||||
if is_pp_missing_parameter(name_mapped, self):
|
if is_pp_missing_parameter(name_mapped, self):
|
||||||
|
|||||||
@ -52,6 +52,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
@ -426,7 +427,8 @@ class Grok1Model(nn.Module):
|
|||||||
param_name, weight_name, expert_id, shard_id = mapping
|
param_name, weight_name, expert_id, shard_id = mapping
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
# Skip layers on other devices.
|
# Skip layers on other devices.
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -63,6 +63,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
@ -848,7 +849,11 @@ class HunYuanModel(nn.Module):
|
|||||||
|
|
||||||
# Do not modify `name` since the loop may continue here
|
# Do not modify `name` since the loop may continue here
|
||||||
# Instead, create a new variable
|
# Instead, create a new variable
|
||||||
name_mapped = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name_mapped = remap_expert_weight_name(
|
||||||
|
name, weight_name, param_name
|
||||||
|
)
|
||||||
|
|
||||||
if is_pp_missing_parameter(name_mapped, self):
|
if is_pp_missing_parameter(name_mapped, self):
|
||||||
continue
|
continue
|
||||||
param = params_dict[name_mapped]
|
param = params_dict[name_mapped]
|
||||||
|
|||||||
@ -33,7 +33,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
ParallelLMHead,
|
ParallelLMHead,
|
||||||
VocabParallelEmbedding,
|
VocabParallelEmbedding,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
|
default_weight_loader,
|
||||||
|
remap_expert_weight_name,
|
||||||
|
)
|
||||||
from vllm.model_executor.models.llama import LlamaMLP as JambaMLP
|
from vllm.model_executor.models.llama import LlamaMLP as JambaMLP
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
@ -427,7 +430,8 @@ class JambaModel(nn.Module):
|
|||||||
|
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
param = params_dict[name]
|
param = params_dict[name]
|
||||||
weight_loader = param.weight_loader
|
weight_loader = param.weight_loader
|
||||||
weight_loader(
|
weight_loader(
|
||||||
|
|||||||
@ -38,6 +38,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
from vllm.transformers_utils.configs.kimi_linear import KimiLinearConfig
|
from vllm.transformers_utils.configs.kimi_linear import KimiLinearConfig
|
||||||
@ -609,7 +610,8 @@ class KimiLinearForCausalLM(
|
|||||||
):
|
):
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
param = params_dict[name]
|
param = params_dict[name]
|
||||||
|
|||||||
@ -65,6 +65,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.models.deepseek_v2 import DeepseekV2Model
|
from vllm.model_executor.models.deepseek_v2 import DeepseekV2Model
|
||||||
from vllm.model_executor.models.interfaces import SupportsMultiModal, SupportsPP
|
from vllm.model_executor.models.interfaces import SupportsMultiModal, SupportsPP
|
||||||
@ -528,7 +529,8 @@ class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
|
|||||||
) in enumerate(expert_params_mapping):
|
) in enumerate(expert_params_mapping):
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
|
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -35,7 +35,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
ParallelLMHead,
|
ParallelLMHead,
|
||||||
VocabParallelEmbedding,
|
VocabParallelEmbedding,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
|
default_weight_loader,
|
||||||
|
remap_expert_weight_name,
|
||||||
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
from vllm.transformers_utils.configs import Lfm2MoeConfig
|
from vllm.transformers_utils.configs import Lfm2MoeConfig
|
||||||
|
|
||||||
@ -536,7 +539,8 @@ class Lfm2MoeModel(nn.Module):
|
|||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
# Skip layers on other devices.
|
# Skip layers on other devices.
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -46,6 +46,7 @@ from vllm.model_executor.layers.rotary_embedding import get_rope
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.models.interfaces import MixtureOfExperts
|
from vllm.model_executor.models.interfaces import MixtureOfExperts
|
||||||
from vllm.model_executor.models.utils import sequence_parallel_chunk
|
from vllm.model_executor.models.utils import sequence_parallel_chunk
|
||||||
@ -465,7 +466,7 @@ class Llama4Model(LlamaModel):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Replace the weight name with the parameter name.
|
# Replace the weight name with the parameter name.
|
||||||
full_param_name = name.replace(weight_name, param_name)
|
full_param_name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
|
|
||||||
# Skip if the current weight corresponds to a parameter that
|
# Skip if the current weight corresponds to a parameter that
|
||||||
# does not exist on the current PP (pipeline parallel) rank.
|
# does not exist on the current PP (pipeline parallel) rank.
|
||||||
|
|||||||
@ -60,7 +60,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
ParallelLMHead,
|
ParallelLMHead,
|
||||||
VocabParallelEmbedding,
|
VocabParallelEmbedding,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
|
default_weight_loader,
|
||||||
|
remap_expert_weight_name,
|
||||||
|
)
|
||||||
from vllm.model_executor.models.deepseek_v2 import DeepseekV2MLAAttention
|
from vllm.model_executor.models.deepseek_v2 import DeepseekV2MLAAttention
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
@ -676,7 +679,10 @@ class LongcatFlashForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
|
|||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
is_expert_weight = True
|
is_expert_weight = True
|
||||||
name_mapped = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name_mapped = remap_expert_weight_name(
|
||||||
|
name, weight_name, param_name
|
||||||
|
)
|
||||||
# Skip mtp
|
# Skip mtp
|
||||||
if ".mtp." in name_mapped:
|
if ".mtp." in name_mapped:
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -40,6 +40,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.models.utils import sequence_parallel_chunk
|
from vllm.model_executor.models.utils import sequence_parallel_chunk
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
@ -555,7 +556,8 @@ class MiMoV2Model(nn.Module):
|
|||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
name_rewritten = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name_rewritten = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
|
|
||||||
if is_pp_missing_parameter(name_rewritten, self):
|
if is_pp_missing_parameter(name_rewritten, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -58,7 +58,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
ParallelLMHead,
|
ParallelLMHead,
|
||||||
VocabParallelEmbedding,
|
VocabParallelEmbedding,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
|
default_weight_loader,
|
||||||
|
remap_expert_weight_name,
|
||||||
|
)
|
||||||
from vllm.model_executor.utils import set_weight_attrs
|
from vllm.model_executor.utils import set_weight_attrs
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
@ -525,7 +528,8 @@ class MiniCPMModel(nn.Module):
|
|||||||
for param_name, weight_name, expert_id in expert_params_mapping:
|
for param_name, weight_name, expert_id in expert_params_mapping:
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
param = params_dict[name]
|
param = params_dict[name]
|
||||||
|
|||||||
@ -40,7 +40,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
ParallelLMHead,
|
ParallelLMHead,
|
||||||
VocabParallelEmbedding,
|
VocabParallelEmbedding,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
|
default_weight_loader,
|
||||||
|
remap_expert_weight_name,
|
||||||
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
from .interfaces import SupportsEagle, SupportsLoRA, SupportsPP
|
from .interfaces import SupportsEagle, SupportsLoRA, SupportsPP
|
||||||
@ -262,7 +265,8 @@ class EagleMiniCPMModel(nn.Module):
|
|||||||
for param_name, weight_name, expert_id in expert_params_mapping:
|
for param_name, weight_name, expert_id in expert_params_mapping:
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
param = params_dict[name]
|
param = params_dict[name]
|
||||||
|
|||||||
@ -56,6 +56,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
@ -448,7 +449,8 @@ class MiniMaxM2Model(nn.Module):
|
|||||||
param_name, weight_name, expert_id, shard_id = mapping
|
param_name, weight_name, expert_id, shard_id = mapping
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
|
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -45,7 +45,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
ParallelLMHead,
|
ParallelLMHead,
|
||||||
VocabParallelEmbedding,
|
VocabParallelEmbedding,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
|
default_weight_loader,
|
||||||
|
remap_expert_weight_name,
|
||||||
|
)
|
||||||
from vllm.model_executor.models.utils import maybe_prefix
|
from vllm.model_executor.models.utils import maybe_prefix
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
@ -806,7 +809,8 @@ class MiniMaxText01ForCausalLM(nn.Module, HasInnerState, IsHybrid):
|
|||||||
continue
|
continue
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
return
|
return
|
||||||
param = params_dict[name]
|
param = params_dict[name]
|
||||||
|
|||||||
@ -57,6 +57,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
@ -428,7 +429,10 @@ class MixtralModel(nn.Module):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
is_expert_weight = True
|
is_expert_weight = True
|
||||||
name_mapped = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name_mapped = remap_expert_weight_name(
|
||||||
|
name, weight_name, param_name
|
||||||
|
)
|
||||||
|
|
||||||
# Skip layers on other devices.
|
# Skip layers on other devices.
|
||||||
if is_pp_missing_parameter(name_mapped, self):
|
if is_pp_missing_parameter(name_mapped, self):
|
||||||
|
|||||||
@ -56,6 +56,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.models.interfaces import (
|
from vllm.model_executor.models.interfaces import (
|
||||||
HasInnerState,
|
HasInnerState,
|
||||||
@ -696,7 +697,10 @@ class NemotronHModel(nn.Module):
|
|||||||
|
|
||||||
# Do not modify `name` since the loop may continue here
|
# Do not modify `name` since the loop may continue here
|
||||||
# Instead, create a new variable
|
# Instead, create a new variable
|
||||||
name_mapped = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name_mapped = remap_expert_weight_name(
|
||||||
|
name, weight_name, param_name
|
||||||
|
)
|
||||||
|
|
||||||
if is_pp_missing_parameter(name_mapped, self):
|
if is_pp_missing_parameter(name_mapped, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -46,7 +46,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
ParallelLMHead,
|
ParallelLMHead,
|
||||||
VocabParallelEmbedding,
|
VocabParallelEmbedding,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
|
default_weight_loader,
|
||||||
|
remap_expert_weight_name,
|
||||||
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
from .interfaces import SupportsLoRA, SupportsPP
|
from .interfaces import SupportsLoRA, SupportsPP
|
||||||
@ -383,7 +386,8 @@ class OlmoeModel(nn.Module):
|
|||||||
param_name, weight_name, expert_id, shard_id = mapping
|
param_name, weight_name, expert_id, shard_id = mapping
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
# Skip layers on other devices.
|
# Skip layers on other devices.
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -61,6 +61,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.models.interfaces import (
|
from vllm.model_executor.models.interfaces import (
|
||||||
MixtureOfExperts,
|
MixtureOfExperts,
|
||||||
@ -820,7 +821,10 @@ class OpenPanguModel(nn.Module):
|
|||||||
if origin_name not in weight_name:
|
if origin_name not in weight_name:
|
||||||
continue
|
continue
|
||||||
flag_dict["is_expert_weight"] = True
|
flag_dict["is_expert_weight"] = True
|
||||||
weight_name_mapped = weight_name.replace(origin_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
weight_name_mapped = remap_expert_weight_name(
|
||||||
|
weight_name, origin_name, param_name
|
||||||
|
)
|
||||||
if is_pp_missing_parameter(weight_name_mapped, self):
|
if is_pp_missing_parameter(weight_name_mapped, self):
|
||||||
continue
|
continue
|
||||||
param = params_dict[weight_name_mapped]
|
param = params_dict[weight_name_mapped]
|
||||||
|
|||||||
@ -34,7 +34,10 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor
|
|||||||
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
||||||
VocabParallelEmbedding,
|
VocabParallelEmbedding,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
|
default_weight_loader,
|
||||||
|
remap_expert_weight_name,
|
||||||
|
)
|
||||||
from vllm.model_executor.models.deepseek_mtp import (
|
from vllm.model_executor.models.deepseek_mtp import (
|
||||||
DeepSeekMultiTokenPredictor,
|
DeepSeekMultiTokenPredictor,
|
||||||
DeepSeekMultiTokenPredictorLayer,
|
DeepSeekMultiTokenPredictorLayer,
|
||||||
@ -201,7 +204,8 @@ class OpenPanguMTP(nn.Module, SupportsPP):
|
|||||||
param_name, weight_name, expert_id, shard_id = mapping
|
param_name, weight_name, expert_id, shard_id = mapping
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
|
|
||||||
param = params_dict[name]
|
param = params_dict[name]
|
||||||
weight_loader = param.weight_loader
|
weight_loader = param.weight_loader
|
||||||
|
|||||||
@ -51,6 +51,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
@ -565,7 +566,8 @@ class PhiMoEModel(nn.Module):
|
|||||||
param_name, weight_name, expert_id, shard_id = mapping
|
param_name, weight_name, expert_id, shard_id = mapping
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
# Skip layers on other devices.
|
# Skip layers on other devices.
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -55,7 +55,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
ParallelLMHead,
|
ParallelLMHead,
|
||||||
VocabParallelEmbedding,
|
VocabParallelEmbedding,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
|
default_weight_loader,
|
||||||
|
remap_expert_weight_name,
|
||||||
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
from .interfaces import SupportsLoRA, SupportsPP
|
from .interfaces import SupportsLoRA, SupportsPP
|
||||||
@ -470,7 +473,8 @@ class Qwen2MoeModel(nn.Module):
|
|||||||
param_name, weight_name, expert_id, shard_id = mapping
|
param_name, weight_name, expert_id, shard_id = mapping
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
|
|
||||||
# Skip layers on other devices.
|
# Skip layers on other devices.
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
|
|||||||
@ -61,6 +61,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.models.utils import sequence_parallel_chunk
|
from vllm.model_executor.models.utils import sequence_parallel_chunk
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
@ -567,7 +568,10 @@ class Qwen3MoeModel(nn.Module):
|
|||||||
|
|
||||||
# Do not modify `name` since the loop may continue here
|
# Do not modify `name` since the loop may continue here
|
||||||
# Instead, create a new variable
|
# Instead, create a new variable
|
||||||
name_mapped = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name_mapped = remap_expert_weight_name(
|
||||||
|
name, weight_name, param_name
|
||||||
|
)
|
||||||
|
|
||||||
if is_pp_missing_parameter(name_mapped, self):
|
if is_pp_missing_parameter(name_mapped, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -65,6 +65,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
)
|
)
|
||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
|
remap_expert_weight_name,
|
||||||
sharded_weight_loader,
|
sharded_weight_loader,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.models.qwen2_moe import Qwen2MoeMLP as Qwen3NextMLP
|
from vllm.model_executor.models.qwen2_moe import Qwen2MoeMLP as Qwen3NextMLP
|
||||||
@ -1083,7 +1084,8 @@ class Qwen3NextModel(nn.Module):
|
|||||||
param_name, weight_name, expert_id, shard_id = mapping
|
param_name, weight_name, expert_id, shard_id = mapping
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
# Skip layers on other devices.
|
# Skip layers on other devices.
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -18,7 +18,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
ParallelLMHead,
|
ParallelLMHead,
|
||||||
VocabParallelEmbedding,
|
VocabParallelEmbedding,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
|
default_weight_loader,
|
||||||
|
remap_expert_weight_name,
|
||||||
|
)
|
||||||
from vllm.model_executor.models.qwen3_next import (
|
from vllm.model_executor.models.qwen3_next import (
|
||||||
Qwen3NextDecoderLayer,
|
Qwen3NextDecoderLayer,
|
||||||
Qwen3NextRMSNorm,
|
Qwen3NextRMSNorm,
|
||||||
@ -184,7 +187,8 @@ class Qwen3NextMultiTokenPredictor(nn.Module):
|
|||||||
param_name, weight_name, expert_id, shard_id = mapping
|
param_name, weight_name, expert_id, shard_id = mapping
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
# Skip layers on other devices.
|
# Skip layers on other devices.
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -42,6 +42,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
|
|||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
default_weight_loader,
|
default_weight_loader,
|
||||||
maybe_remap_kv_scale_name,
|
maybe_remap_kv_scale_name,
|
||||||
|
remap_expert_weight_name,
|
||||||
)
|
)
|
||||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
@ -238,7 +239,10 @@ class Qwen3MoeLLMModel(Qwen3MoeModel):
|
|||||||
# Anyway, this is an expert weight and should not be
|
# Anyway, this is an expert weight and should not be
|
||||||
# attempted to load as other weights later
|
# attempted to load as other weights later
|
||||||
is_expert_weight = True
|
is_expert_weight = True
|
||||||
name_mapped = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name_mapped = remap_expert_weight_name(
|
||||||
|
name, weight_name, param_name
|
||||||
|
)
|
||||||
if is_pp_missing_parameter(name_mapped, self):
|
if is_pp_missing_parameter(name_mapped, self):
|
||||||
continue
|
continue
|
||||||
if is_fused_expert:
|
if is_fused_expert:
|
||||||
|
|||||||
@ -34,7 +34,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|||||||
ParallelLMHead,
|
ParallelLMHead,
|
||||||
VocabParallelEmbedding,
|
VocabParallelEmbedding,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
|
default_weight_loader,
|
||||||
|
remap_expert_weight_name,
|
||||||
|
)
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
from vllm.transformers_utils.configs.step3_vl import Step3TextConfig
|
from vllm.transformers_utils.configs.step3_vl import Step3TextConfig
|
||||||
|
|
||||||
@ -498,7 +501,8 @@ class Step3TextForCausalLM(nn.Module, SupportsPP):
|
|||||||
param_name, weight_name, shard_id = mapping
|
param_name, weight_name, shard_id = mapping
|
||||||
if weight_name not in name:
|
if weight_name not in name:
|
||||||
continue
|
continue
|
||||||
name = name.replace(weight_name, param_name)
|
# Remap expert weight name (handles base_layer suffix correctly)
|
||||||
|
name = remap_expert_weight_name(name, weight_name, param_name)
|
||||||
# Skip layers on other devices.
|
# Skip layers on other devices.
|
||||||
if is_pp_missing_parameter(name, self):
|
if is_pp_missing_parameter(name, self):
|
||||||
continue
|
continue
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user