From 40d33264c680a8c725b93db6ccce608f99e5c7da Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Mon, 10 Nov 2025 12:39:19 -0800 Subject: [PATCH] [Bugfix][EPLB] Disabled shared expert overlap when EPLB is enabled (#28377) Signed-off-by: Sage Moore Signed-off-by: Sage Moore Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> --- .../layers/fused_moe/shared_fused_moe.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py index 6b4a0b8cf0730..3d0c5636d6c0a 100644 --- a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py @@ -28,13 +28,18 @@ class SharedFusedMoE(FusedMoE): super().__init__(**kwargs) self._shared_experts = shared_experts - # Disable shared expert overlap if we are not using - # flashinfer + DP since there is nothing to be gained in this case. - # Disabling the overlap optimization also prevents the shared experts - # from being hidden from torch.compile. + # Disable shared expert overlap if we are using eplb, because of + # correctness issues, or if using flashinfer with DP, since there + # is nothing to be gained in this case. Disabling the overlap + # optimization also prevents the shared experts from being hidden + # from torch.compile. self.use_overlapped = ( use_overlapped - and not (self.use_flashinfer_cutlass_kernels and self.dp_size > 1) + and not ( + # TODO(wentao): find the root cause and remove this condition + self.enable_eplb + or (self.use_flashinfer_cutlass_kernels and self.dp_size > 1) + ) and self._shared_experts is not None )