[Model] Fix bailing_moe accuracy problem (#28277)

Signed-off-by: zhaozx-cn <zhaozx2116@163.com>
This commit is contained in:
zhaozx-cn 2025-11-14 21:33:02 +08:00 committed by GitHub
parent 8d3748d3c7
commit 433c0f8675
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -39,7 +39,6 @@ from vllm.distributed import (
get_pp_group,
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
tensor_model_parallel_all_reduce,
)
from vllm.model_executor.layers.activation import SiluAndMul
from vllm.model_executor.layers.fused_moe import SharedFusedMoE
@ -330,7 +329,9 @@ class BailingMoE(nn.Module):
final_hidden_states = final_hidden_states + shared_output
if self.tp_size > 1:
final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states)
final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
final_hidden_states
)
return final_hidden_states.view(num_tokens, hidden_size)