From 59bd5f6a718a309517343e126f5086e057227992 Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Tue, 16 Dec 2025 10:33:52 -0500 Subject: [PATCH] [Feat] Enable eplb with default all2all backend (#30559) Signed-off-by: yewentao256 --- vllm/model_executor/layers/fused_moe/shared_fused_moe.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py index 60aa1c088b4d8..a143347b19f2c 100644 --- a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py @@ -29,14 +29,14 @@ class SharedFusedMoE(FusedMoE): self._shared_experts = shared_experts # Disable shared expert overlap if: - # - we are using eplb, because of correctness issues - # - we are using flashinfer with DP, since there nothing to gain + # - we are using eplb with non-default backend, because of correctness issues + # - we are using flashinfer with DP, since there nothint to gain # - we are using marlin kernels + backend = self.moe_parallel_config.all2all_backend self.use_overlapped = ( use_overlapped and not ( - # TODO(wentao): find the root cause and remove this condition - self.enable_eplb + (self.enable_eplb and backend != "allgather_reducescatter") or (self.moe_config.use_flashinfer_cutlass_kernels and self.dp_size > 1) ) and self._shared_experts is not None