From 59bd5f6a718a309517343e126f5086e057227992 Mon Sep 17 00:00:00 2001
From: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Date: Tue, 16 Dec 2025 10:33:52 -0500
Subject: [PATCH] [Feat] Enable eplb with default all2all backend (#30559)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 vllm/model_executor/layers/fused_moe/shared_fused_moe.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py
index 60aa1c088b4d8..a143347b19f2c 100644
--- a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py
@@ -29,14 +29,14 @@ class SharedFusedMoE(FusedMoE):
         self._shared_experts = shared_experts
 
         # Disable shared expert overlap if:
-        #   - we are using eplb, because of correctness issues
-        #   - we are using flashinfer with DP, since there nothing to gain
+        #   - we are using eplb with non-default backend, because of correctness issues
+        #   - we are using flashinfer with DP, since there nothint to gain
         #   - we are using marlin kernels
+        backend = self.moe_parallel_config.all2all_backend
         self.use_overlapped = (
             use_overlapped
             and not (
-                # TODO(wentao): find the root cause and remove this condition
-                self.enable_eplb
+                (self.enable_eplb and backend != "allgather_reducescatter")
                 or (self.moe_config.use_flashinfer_cutlass_kernels and self.dp_size > 1)
             )
             and self._shared_experts is not None