mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-21 07:47:00 +08:00
[BugFix] Fix Llama4 Pipeline Parallelism Assert Error (#28577)
Co-authored-by: Dezhan Tu <dztu@meta.com>
This commit is contained in:
parent
fb8851f254
commit
dc45efc8ef
@ -53,6 +53,7 @@ from vllm.model_executor.models.utils import sequence_parallel_chunk
|
|||||||
from .llama import LlamaForCausalLM, LlamaMLP, LlamaModel
|
from .llama import LlamaForCausalLM, LlamaMLP, LlamaModel
|
||||||
from .utils import (
|
from .utils import (
|
||||||
AutoWeightsLoader,
|
AutoWeightsLoader,
|
||||||
|
PPMissingLayer,
|
||||||
extract_layer_index,
|
extract_layer_index,
|
||||||
fast_topk,
|
fast_topk,
|
||||||
is_pp_missing_parameter,
|
is_pp_missing_parameter,
|
||||||
@ -729,6 +730,9 @@ class Llama4ForCausalLM(LlamaForCausalLM, MixtureOfExperts):
|
|||||||
self.moe_layers = []
|
self.moe_layers = []
|
||||||
example_moe = None
|
example_moe = None
|
||||||
for layer in self.model.layers:
|
for layer in self.model.layers:
|
||||||
|
if isinstance(layer, PPMissingLayer):
|
||||||
|
continue
|
||||||
|
|
||||||
assert isinstance(layer, Llama4DecoderLayer)
|
assert isinstance(layer, Llama4DecoderLayer)
|
||||||
if isinstance(layer.feed_forward, Llama4MoE):
|
if isinstance(layer.feed_forward, Llama4MoE):
|
||||||
# Pick last one layer since the first ones may be dense layers.
|
# Pick last one layer since the first ones may be dense layers.
|
||||||
@ -765,6 +769,9 @@ class Llama4ForCausalLM(LlamaForCausalLM, MixtureOfExperts):
|
|||||||
self.num_local_physical_experts = num_local_physical_experts
|
self.num_local_physical_experts = num_local_physical_experts
|
||||||
self.num_redundant_experts = num_physical_experts - self.num_logical_experts
|
self.num_redundant_experts = num_physical_experts - self.num_logical_experts
|
||||||
for layer in self.model.layers:
|
for layer in self.model.layers:
|
||||||
|
if isinstance(layer, PPMissingLayer):
|
||||||
|
continue
|
||||||
|
|
||||||
if isinstance(layer.feed_forward, Llama4MoE):
|
if isinstance(layer.feed_forward, Llama4MoE):
|
||||||
moe = layer.feed_forward
|
moe = layer.feed_forward
|
||||||
moe.n_local_physical_experts = num_local_physical_experts
|
moe.n_local_physical_experts = num_local_physical_experts
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user