Correct PPMissingLayer handling in Deepseek-V2-Lite PP deployment (#20665)

Signed-off-by: Seiji Eicher <seiji@anyscale.com>
This commit is contained in:
Seiji Eicher 2025-07-09 20:34:40 -07:00 committed by GitHub
parent 49e8c7ea25
commit ad6c2e1a0b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -739,14 +739,20 @@ class DeepseekV2ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
self.num_expert_groups = config.n_group
self.moe_layers: list[FusedMoE] = []
example_moe = None
for layer in self.model.layers:
if isinstance(layer, PPMissingLayer):
continue
assert isinstance(layer, DeepseekV2DecoderLayer)
if isinstance(layer.mlp, DeepseekV2MoE):
# Pick last one layer since the first ones may be dense layers.
example_moe = layer.mlp
self.moe_layers.append(layer.mlp.experts)
# Pick last one layer since the first ones may be dense layers.
example_moe = typing.cast(
DeepseekV2MoE, self.model.layers[config.num_hidden_layers - 1].mlp)
if example_moe is None:
raise RuntimeError("No DeepseekV2MoE layer found in model.layers.")
self.num_logical_experts = example_moe.n_logical_experts
self.num_physical_experts = example_moe.n_physical_experts
self.num_local_physical_experts = example_moe.n_local_physical_experts