From dc45efc8ef7fc1e2571331eaf4671e1652e2a865 Mon Sep 17 00:00:00 2001 From: Dezhan Date: Thu, 20 Nov 2025 02:52:36 -0800 Subject: [PATCH] [BugFix] Fix Llama4 Pipeline Parallelism Assert Error (#28577) Co-authored-by: Dezhan Tu --- vllm/model_executor/models/llama4.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vllm/model_executor/models/llama4.py b/vllm/model_executor/models/llama4.py index 4c6d1d4244755..e1bdfc3405f70 100644 --- a/vllm/model_executor/models/llama4.py +++ b/vllm/model_executor/models/llama4.py @@ -53,6 +53,7 @@ from vllm.model_executor.models.utils import sequence_parallel_chunk from .llama import LlamaForCausalLM, LlamaMLP, LlamaModel from .utils import ( AutoWeightsLoader, + PPMissingLayer, extract_layer_index, fast_topk, is_pp_missing_parameter, @@ -729,6 +730,9 @@ class Llama4ForCausalLM(LlamaForCausalLM, MixtureOfExperts): self.moe_layers = [] example_moe = None for layer in self.model.layers: + if isinstance(layer, PPMissingLayer): + continue + assert isinstance(layer, Llama4DecoderLayer) if isinstance(layer.feed_forward, Llama4MoE): # Pick last one layer since the first ones may be dense layers. @@ -765,6 +769,9 @@ class Llama4ForCausalLM(LlamaForCausalLM, MixtureOfExperts): self.num_local_physical_experts = num_local_physical_experts self.num_redundant_experts = num_physical_experts - self.num_logical_experts for layer in self.model.layers: + if isinstance(layer, PPMissingLayer): + continue + if isinstance(layer.feed_forward, Llama4MoE): moe = layer.feed_forward moe.n_local_physical_experts = num_local_physical_experts