[Feat] Enable eplb with default all2all backend (#30559)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
2026-07-02 00:27:18 +08:00 · 2025-12-16 10:33:52 -05:00 · 2025-12-16 10:33:52 -05:00 · 59bd5f6a71
commit 59bd5f6a71
parent 00a8d7628c
1 changed files with 4 additions and 4 deletions
--- a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py
@ -29,14 +29,14 @@ class SharedFusedMoE(FusedMoE):
        self._shared_experts = shared_experts

        # Disable shared expert overlap if:
-        #   - we are using eplb, because of correctness issues
-        #   - we are using flashinfer with DP, since there nothing to gain
+        #   - we are using eplb with non-default backend, because of correctness issues
+        #   - we are using flashinfer with DP, since there nothint to gain
        #   - we are using marlin kernels
+        backend = self.moe_parallel_config.all2all_backend
        self.use_overlapped = (
            use_overlapped
            and not (
-                # TODO(wentao): find the root cause and remove this condition
-                self.enable_eplb
+                (self.enable_eplb and backend != "allgather_reducescatter")
                or (self.moe_config.use_flashinfer_cutlass_kernels and self.dp_size > 1)
            )
            and self._shared_experts is not None