From 72c5dd0310d34958ac405032f95e77245679f61a Mon Sep 17 00:00:00 2001
From: Matthew Bonanni <mbonanni@redhat.com>
Date: Thu, 2 Oct 2025 17:29:49 -0400
Subject: [PATCH] Fix MTP with deepep_low_latency (#25904)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 vllm/model_executor/layers/fused_moe/layer.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
index 8de1d14d46b33..9a7ca7b6d1240 100644
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -1899,6 +1899,15 @@ class FusedMoE(CustomOp):
             staged_hidden_states.copy_(hidden_states, non_blocking=True)
             staged_router_logits.copy_(router_logits, non_blocking=True)
 
+            # If there are shared experts but we are not using a modular kernel,
+            # the shared experts must be called here
+            if (not isinstance(self.quant_method.fused_experts,
+                               FusedMoEModularKernel)
+                    and self.shared_experts is not None):
+                shared_output = self.shared_experts(staged_hidden_states)
+            else:
+                shared_output = None
+
             # Matrix multiply.
             final_hidden_states = self.quant_method.apply(
                 layer=self,
@@ -1922,8 +1931,13 @@ class FusedMoE(CustomOp):
                 logical_replica_count=self.logical_replica_count,
             )
 
-            assert self.shared_experts is None or isinstance(
-                final_hidden_states, tuple)
+            if shared_output is not None:
+                assert not isinstance(final_hidden_states, tuple)
+                assert self.shared_experts is not None
+                final_hidden_states = (
+                    shared_output,
+                    final_hidden_states,
+                )
 
             if self.zero_expert_num is not None and self.zero_expert_num > 0:
                 assert isinstance(final_hidden_states, tuple)