From 433c0f86751f20dbdfdeb1a711def99b7ae3df92 Mon Sep 17 00:00:00 2001 From: zhaozx-cn <59479021+zhaozx-cn@users.noreply.github.com> Date: Fri, 14 Nov 2025 21:33:02 +0800 Subject: [PATCH] [Model] Fix bailing_moe accuracy problem (#28277) Signed-off-by: zhaozx-cn --- vllm/model_executor/models/bailing_moe.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/bailing_moe.py b/vllm/model_executor/models/bailing_moe.py index a87813402256..6e1e5b1ddc50 100644 --- a/vllm/model_executor/models/bailing_moe.py +++ b/vllm/model_executor/models/bailing_moe.py @@ -39,7 +39,6 @@ from vllm.distributed import ( get_pp_group, get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size, - tensor_model_parallel_all_reduce, ) from vllm.model_executor.layers.activation import SiluAndMul from vllm.model_executor.layers.fused_moe import SharedFusedMoE @@ -330,7 +329,9 @@ class BailingMoE(nn.Module): final_hidden_states = final_hidden_states + shared_output if self.tp_size > 1: - final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states) + final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel( + final_hidden_states + ) return final_hidden_states.view(num_tokens, hidden_size)