[Bugfix] Fix incorrect use of hidden_states for shared_experts due to do_naive_dispatch_combine (#28740)

Signed-off-by: Alexander Matveev <amatveev@redhat.com>
2026-03-16 15:57:15 +08:00 · 2025-11-14 17:13:46 -05:00 · 2025-11-14 17:13:46 -05:00 · e5c78956c0
commit e5c78956c0
parent 2e0ad629b0
1 changed files with 4 additions and 2 deletions
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@ -1749,14 +1749,16 @@ class FusedMoE(CustomOp):

        with sp_ctx:
            if do_naive_dispatch_combine:
-                hidden_states, router_logits = get_ep_group().dispatch(
+                hidden_states_combined, router_logits = get_ep_group().dispatch(
                    hidden_states, router_logits, self.is_sequence_parallel
                )

            # Matrix multiply.
            final_hidden_states = self.quant_method.apply(
                layer=self,
-                x=hidden_states,
+                x=hidden_states_combined
+                if do_naive_dispatch_combine
+                else hidden_states,
                router_logits=router_logits,
                top_k=self.top_k,
                renormalize=self.renormalize,