Merge b1139f3484269217d41c0bc6ca794526f81a8f5b into 254f6b986720c92ddf97fbb1a6a6465da8e87e29

This commit is contained in:
ℍ𝕠𝕝𝕝𝕠𝕨 𝕄𝕒𝕟 2025-12-25 08:06:39 +08:00 committed by GitHub
commit fdab8ce4b0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
41 changed files with 230 additions and 55 deletions

View File

@ -1366,6 +1366,10 @@ class FusedMoE(CustomOp):
def load_weights(
self, weights: Iterable[tuple[str, torch.Tensor]]
) -> Iterable[str]:
from vllm.model_executor.model_loader.weight_utils import (
remap_expert_weight_name,
)
if (expert_mapping := self.expert_mapping) is None:
raise ValueError(
"`self.expert_mapping` must be provided to "
@ -1376,7 +1380,10 @@ class FusedMoE(CustomOp):
for param_name, weight_name, expert_id, shard_id in expert_mapping:
if weight_name not in qual_name:
continue
weight_name = qual_name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
weight_name = remap_expert_weight_name(
qual_name, weight_name, param_name
)
param_name = weight_name.removeprefix(f"{self.layer_name}.")
param = getattr(self, param_name)
success = self.weight_loader(

View File

@ -1178,3 +1178,38 @@ def maybe_remap_kv_scale_name(name: str, params_dict: dict) -> str | None:
# If there were no matches, return the untouched param name
return name
def remap_expert_weight_name(
name: str,
weight_name: str,
param_name: str,
) -> str:
"""Remap expert weight names, handling base_layer prefix for LoRA.
When loading expert weights, this function maps from checkpoint weight
names to model parameter names. It handles the special case where
LoRA wraps the original layer with a `base_layer` prefix.
For example:
- Input: name="model.layers.0.mlp.experts.0.up_proj.base_layer.weight"
weight_name="experts.0.up_proj."
param_name="experts.w13_"
- Output: "model.layers.0.mlp.experts.base_layer.w13_weight"
Args:
name: The full checkpoint weight name.
weight_name: The weight name pattern to match (e.g., "experts.0.up_proj.").
param_name: The parameter name to substitute (e.g., "experts.w13_").
Returns:
The remapped weight name with proper base_layer handling.
"""
prefix, _, suffix = name.partition(weight_name)
middle = param_name
base = "base_layer"
if suffix.startswith(f"{base}."):
param_list = param_name.split(".", 1)
param_list.insert(1, base)
middle = ".".join(param_list)
return prefix + middle + suffix.removeprefix(f"{base}.")

View File

@ -36,6 +36,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.model_executor.models.interfaces import SupportsLoRA, SupportsPP
from vllm.model_executor.models.llama import LlamaMLP as AfmoeMLP
@ -533,7 +534,10 @@ class AfmoeModel(nn.Module):
# Do not modify `name` since the loop may continue here
# Instead, create a new variable
name_mapped = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name_mapped = remap_expert_weight_name(
name, weight_name, param_name
)
if is_pp_missing_parameter(name_mapped, self):
continue

View File

@ -38,7 +38,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
remap_expert_weight_name,
)
from vllm.model_executor.utils import set_weight_attrs
from vllm.platforms import current_platform
from vllm.sequence import IntermediateTensors
@ -609,7 +612,8 @@ class ArcticForCausalLM(nn.Module, SupportsPP, SupportsQuant):
for param_name, weight_name, shard_id in expert_params_mapping:
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
if is_pp_missing_parameter(name, self):
continue
param = params_dict[name]

View File

@ -55,7 +55,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
from .interfaces import SupportsLoRA, SupportsPP
@ -524,7 +527,8 @@ class BailingMoeModel(nn.Module):
param_name, weight_name, expert_id, shard_id = mapping
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
if is_pp_missing_parameter(name, self):
continue

View File

@ -31,6 +31,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
@ -411,7 +412,8 @@ class DbrxModel(nn.Module):
for param_name, weight_name in expert_params_mapping:
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
if is_pp_missing_parameter(name, self):
continue
param = params_dict[name]

View File

@ -18,6 +18,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.model_executor.models.deepseek_v2 import (
DeepseekV2DecoderLayer,
@ -155,7 +156,8 @@ class DeepseekV2Model(nn.Module):
param_name, weight_name, expert_id, shard_id = mapping
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
param = params_dict[name]
weight_loader = param.weight_loader

View File

@ -22,6 +22,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.platforms import current_platform
from vllm.sequence import IntermediateTensors
@ -359,7 +360,10 @@ class DeepSeekMTP(nn.Module, SupportsPP, DeepseekV2MixtureOfExperts):
# Do not modify `name` since the loop may continue here
# Instead, create a new variable
name_mapped = chunk_name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name_mapped = remap_expert_weight_name(
chunk_name, weight_name, param_name
)
param = params_dict[name_mapped]
# We should ask the weight loader to return success or

View File

@ -72,6 +72,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.model_executor.models.utils import sequence_parallel_chunk
from vllm.platforms import current_platform
@ -1643,7 +1644,10 @@ class DeepseekV2ForCausalLM(
# Do not modify `name` since the loop may continue here
# Instead, create a new variable
name_mapped = chunk_name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name_mapped = remap_expert_weight_name(
chunk_name, weight_name, param_name
)
if is_pp_missing_parameter(name_mapped, self):
continue

View File

@ -59,6 +59,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
@ -464,7 +465,8 @@ class Dots1Model(nn.Module):
param_name, weight_name, expert_id, shard_id = mapping
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
if is_pp_missing_parameter(name, self):
continue

View File

@ -60,6 +60,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.config import set_default_rope_theta
@ -563,7 +564,11 @@ class Ernie4_5_MoeModel(nn.Module):
# Do not modify `name` since the loop may continue here
# Instead, create a new variable
name_mapped = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name_mapped = remap_expert_weight_name(
name, weight_name, param_name
)
# Skip layers on other devices.
if is_pp_missing_parameter(name_mapped, self):
continue

View File

@ -56,6 +56,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.config import set_default_rope_theta
@ -736,7 +737,8 @@ class Ernie4_5_VLMoeForCausalLM(nn.Module, SupportsPP):
moe_offset = int(name.split(".")[-3])
is_text_expert = moe_offset <= self.config.moe_num_experts[0] - 1
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
if is_text_expert:
name = name.replace(".experts.", ".text_experts.")
else:

View File

@ -59,6 +59,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
@ -554,7 +555,10 @@ class Glm4MoeModel(nn.Module):
# Do not modify `name` since the loop may continue here
# Instead, create a new variable
name_mapped = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name_mapped = remap_expert_weight_name(
name, weight_name, param_name
)
if is_pp_missing_parameter(name_mapped, self):
continue

View File

@ -38,7 +38,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
from .glm4_moe import (
@ -293,7 +296,8 @@ class Glm4MoeMTP(nn.Module, SupportsPP, Glm4MixtureOfExperts):
param_name, weight_name, expert_id, shard_id = mapping
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
param = params_dict[name]
weight_loader = param.weight_loader

View File

@ -56,6 +56,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.model_executor.models.utils import sequence_parallel_chunk
from vllm.sequence import IntermediateTensors
@ -401,7 +402,8 @@ class GraniteMoeModel(nn.Module):
param_name, weight_name, expert_id, shard_id = mapping
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
# Skip layers on other devices.
if is_pp_missing_parameter(name, self):
continue

View File

@ -28,7 +28,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
from .granitemoe import GraniteMoeMoE
@ -465,7 +468,8 @@ class GraniteMoeHybridModel(nn.Module):
if weight_name not in name:
continue
name_mapped = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name_mapped = remap_expert_weight_name(name, weight_name, param_name)
# Skip layers on other devices.
if is_pp_missing_parameter(name_mapped, self):

View File

@ -52,6 +52,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
@ -426,7 +427,8 @@ class Grok1Model(nn.Module):
param_name, weight_name, expert_id, shard_id = mapping
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
# Skip layers on other devices.
if is_pp_missing_parameter(name, self):
continue

View File

@ -63,6 +63,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
@ -848,7 +849,11 @@ class HunYuanModel(nn.Module):
# Do not modify `name` since the loop may continue here
# Instead, create a new variable
name_mapped = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name_mapped = remap_expert_weight_name(
name, weight_name, param_name
)
if is_pp_missing_parameter(name_mapped, self):
continue
param = params_dict[name_mapped]

View File

@ -33,7 +33,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
remap_expert_weight_name,
)
from vllm.model_executor.models.llama import LlamaMLP as JambaMLP
from vllm.sequence import IntermediateTensors
@ -427,7 +430,8 @@ class JambaModel(nn.Module):
if is_pp_missing_parameter(name, self):
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
param = params_dict[name]
weight_loader = param.weight_loader
weight_loader(

View File

@ -38,6 +38,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.configs.kimi_linear import KimiLinearConfig
@ -609,7 +610,8 @@ class KimiLinearForCausalLM(
):
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
if is_pp_missing_parameter(name, self):
continue
param = params_dict[name]

View File

@ -65,6 +65,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.model_executor.models.deepseek_v2 import DeepseekV2Model
from vllm.model_executor.models.interfaces import SupportsMultiModal, SupportsPP
@ -528,7 +529,8 @@ class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
) in enumerate(expert_params_mapping):
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
if is_pp_missing_parameter(name, self):
continue

View File

@ -35,7 +35,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.configs import Lfm2MoeConfig
@ -536,7 +539,8 @@ class Lfm2MoeModel(nn.Module):
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
# Skip layers on other devices.
if is_pp_missing_parameter(name, self):
continue

View File

@ -46,6 +46,7 @@ from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.model_executor.models.interfaces import MixtureOfExperts
from vllm.model_executor.models.utils import sequence_parallel_chunk
@ -465,7 +466,7 @@ class Llama4Model(LlamaModel):
continue
# Replace the weight name with the parameter name.
full_param_name = name.replace(weight_name, param_name)
full_param_name = remap_expert_weight_name(name, weight_name, param_name)
# Skip if the current weight corresponds to a parameter that
# does not exist on the current PP (pipeline parallel) rank.

View File

@ -60,7 +60,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
remap_expert_weight_name,
)
from vllm.model_executor.models.deepseek_v2 import DeepseekV2MLAAttention
from vllm.sequence import IntermediateTensors
@ -676,7 +679,10 @@ class LongcatFlashForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
if weight_name not in name:
continue
is_expert_weight = True
name_mapped = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name_mapped = remap_expert_weight_name(
name, weight_name, param_name
)
# Skip mtp
if ".mtp." in name_mapped:
continue

View File

@ -40,6 +40,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.model_executor.models.utils import sequence_parallel_chunk
from vllm.sequence import IntermediateTensors
@ -555,7 +556,8 @@ class MiMoV2Model(nn.Module):
if weight_name not in name:
continue
name_rewritten = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name_rewritten = remap_expert_weight_name(name, weight_name, param_name)
if is_pp_missing_parameter(name_rewritten, self):
continue

View File

@ -58,7 +58,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
remap_expert_weight_name,
)
from vllm.model_executor.utils import set_weight_attrs
from vllm.platforms import current_platform
from vllm.sequence import IntermediateTensors
@ -525,7 +528,8 @@ class MiniCPMModel(nn.Module):
for param_name, weight_name, expert_id in expert_params_mapping:
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
if is_pp_missing_parameter(name, self):
continue
param = params_dict[name]

View File

@ -40,7 +40,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
from .interfaces import SupportsEagle, SupportsLoRA, SupportsPP
@ -262,7 +265,8 @@ class EagleMiniCPMModel(nn.Module):
for param_name, weight_name, expert_id in expert_params_mapping:
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
if is_pp_missing_parameter(name, self):
continue
param = params_dict[name]

View File

@ -56,6 +56,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
@ -448,7 +449,8 @@ class MiniMaxM2Model(nn.Module):
param_name, weight_name, expert_id, shard_id = mapping
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
if is_pp_missing_parameter(name, self):
continue

View File

@ -45,7 +45,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
remap_expert_weight_name,
)
from vllm.model_executor.models.utils import maybe_prefix
from vllm.sequence import IntermediateTensors
@ -806,7 +809,8 @@ class MiniMaxText01ForCausalLM(nn.Module, HasInnerState, IsHybrid):
continue
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
if is_pp_missing_parameter(name, self):
return
param = params_dict[name]

View File

@ -57,6 +57,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
@ -428,7 +429,10 @@ class MixtralModel(nn.Module):
continue
is_expert_weight = True
name_mapped = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name_mapped = remap_expert_weight_name(
name, weight_name, param_name
)
# Skip layers on other devices.
if is_pp_missing_parameter(name_mapped, self):

View File

@ -56,6 +56,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.model_executor.models.interfaces import (
HasInnerState,
@ -696,7 +697,10 @@ class NemotronHModel(nn.Module):
# Do not modify `name` since the loop may continue here
# Instead, create a new variable
name_mapped = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name_mapped = remap_expert_weight_name(
name, weight_name, param_name
)
if is_pp_missing_parameter(name_mapped, self):
continue

View File

@ -46,7 +46,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
from .interfaces import SupportsLoRA, SupportsPP
@ -383,7 +386,8 @@ class OlmoeModel(nn.Module):
param_name, weight_name, expert_id, shard_id = mapping
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
# Skip layers on other devices.
if is_pp_missing_parameter(name, self):
continue

View File

@ -61,6 +61,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.model_executor.models.interfaces import (
MixtureOfExperts,
@ -820,7 +821,10 @@ class OpenPanguModel(nn.Module):
if origin_name not in weight_name:
continue
flag_dict["is_expert_weight"] = True
weight_name_mapped = weight_name.replace(origin_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
weight_name_mapped = remap_expert_weight_name(
weight_name, origin_name, param_name
)
if is_pp_missing_parameter(weight_name_mapped, self):
continue
param = params_dict[weight_name_mapped]

View File

@ -34,7 +34,10 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor
from vllm.model_executor.layers.vocab_parallel_embedding import (
VocabParallelEmbedding,
)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
remap_expert_weight_name,
)
from vllm.model_executor.models.deepseek_mtp import (
DeepSeekMultiTokenPredictor,
DeepSeekMultiTokenPredictorLayer,
@ -201,7 +204,8 @@ class OpenPanguMTP(nn.Module, SupportsPP):
param_name, weight_name, expert_id, shard_id = mapping
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
param = params_dict[name]
weight_loader = param.weight_loader

View File

@ -51,6 +51,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
@ -565,7 +566,8 @@ class PhiMoEModel(nn.Module):
param_name, weight_name, expert_id, shard_id = mapping
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
# Skip layers on other devices.
if is_pp_missing_parameter(name, self):
continue

View File

@ -55,7 +55,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
from .interfaces import SupportsLoRA, SupportsPP
@ -470,7 +473,8 @@ class Qwen2MoeModel(nn.Module):
param_name, weight_name, expert_id, shard_id = mapping
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
# Skip layers on other devices.
if is_pp_missing_parameter(name, self):

View File

@ -61,6 +61,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.model_executor.models.utils import sequence_parallel_chunk
from vllm.sequence import IntermediateTensors
@ -567,7 +568,10 @@ class Qwen3MoeModel(nn.Module):
# Do not modify `name` since the loop may continue here
# Instead, create a new variable
name_mapped = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name_mapped = remap_expert_weight_name(
name, weight_name, param_name
)
if is_pp_missing_parameter(name_mapped, self):
continue

View File

@ -65,6 +65,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
)
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
remap_expert_weight_name,
sharded_weight_loader,
)
from vllm.model_executor.models.qwen2_moe import Qwen2MoeMLP as Qwen3NextMLP
@ -1083,7 +1084,8 @@ class Qwen3NextModel(nn.Module):
param_name, weight_name, expert_id, shard_id = mapping
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
# Skip layers on other devices.
if is_pp_missing_parameter(name, self):
continue

View File

@ -18,7 +18,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
remap_expert_weight_name,
)
from vllm.model_executor.models.qwen3_next import (
Qwen3NextDecoderLayer,
Qwen3NextRMSNorm,
@ -184,7 +187,8 @@ class Qwen3NextMultiTokenPredictor(nn.Module):
param_name, weight_name, expert_id, shard_id = mapping
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
# Skip layers on other devices.
if is_pp_missing_parameter(name, self):
continue

View File

@ -42,6 +42,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
maybe_remap_kv_scale_name,
remap_expert_weight_name,
)
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.sequence import IntermediateTensors
@ -238,7 +239,10 @@ class Qwen3MoeLLMModel(Qwen3MoeModel):
# Anyway, this is an expert weight and should not be
# attempted to load as other weights later
is_expert_weight = True
name_mapped = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name_mapped = remap_expert_weight_name(
name, weight_name, param_name
)
if is_pp_missing_parameter(name_mapped, self):
continue
if is_fused_expert:

View File

@ -34,7 +34,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
remap_expert_weight_name,
)
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.configs.step3_vl import Step3TextConfig
@ -498,7 +501,8 @@ class Step3TextForCausalLM(nn.Module, SupportsPP):
param_name, weight_name, shard_id = mapping
if weight_name not in name:
continue
name = name.replace(weight_name, param_name)
# Remap expert weight name (handles base_layer suffix correctly)
name = remap_expert_weight_name(name, weight_name, param_name)
# Skip layers on other devices.
if is_pp_missing_parameter(name, self):
continue