mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 13:35:54 +08:00
[Model] Fix bailing_moe accuracy problem (#28277)
Signed-off-by: zhaozx-cn <zhaozx2116@163.com>
This commit is contained in:
parent
8d3748d3c7
commit
433c0f8675
@ -39,7 +39,6 @@ from vllm.distributed import (
|
||||
get_pp_group,
|
||||
get_tensor_model_parallel_rank,
|
||||
get_tensor_model_parallel_world_size,
|
||||
tensor_model_parallel_all_reduce,
|
||||
)
|
||||
from vllm.model_executor.layers.activation import SiluAndMul
|
||||
from vllm.model_executor.layers.fused_moe import SharedFusedMoE
|
||||
@ -330,7 +329,9 @@ class BailingMoE(nn.Module):
|
||||
final_hidden_states = final_hidden_states + shared_output
|
||||
|
||||
if self.tp_size > 1:
|
||||
final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states)
|
||||
final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
|
||||
final_hidden_states
|
||||
)
|
||||
return final_hidden_states.view(num_tokens, hidden_size)
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user