mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 00:45:36 +08:00
[Model] Fix bailing_moe accuracy problem (#28277)
Signed-off-by: zhaozx-cn <zhaozx2116@163.com>
This commit is contained in:
parent
8d3748d3c7
commit
433c0f8675
@ -39,7 +39,6 @@ from vllm.distributed import (
|
|||||||
get_pp_group,
|
get_pp_group,
|
||||||
get_tensor_model_parallel_rank,
|
get_tensor_model_parallel_rank,
|
||||||
get_tensor_model_parallel_world_size,
|
get_tensor_model_parallel_world_size,
|
||||||
tensor_model_parallel_all_reduce,
|
|
||||||
)
|
)
|
||||||
from vllm.model_executor.layers.activation import SiluAndMul
|
from vllm.model_executor.layers.activation import SiluAndMul
|
||||||
from vllm.model_executor.layers.fused_moe import SharedFusedMoE
|
from vllm.model_executor.layers.fused_moe import SharedFusedMoE
|
||||||
@ -330,7 +329,9 @@ class BailingMoE(nn.Module):
|
|||||||
final_hidden_states = final_hidden_states + shared_output
|
final_hidden_states = final_hidden_states + shared_output
|
||||||
|
|
||||||
if self.tp_size > 1:
|
if self.tp_size > 1:
|
||||||
final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states)
|
final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
|
||||||
|
final_hidden_states
|
||||||
|
)
|
||||||
return final_hidden_states.view(num_tokens, hidden_size)
|
return final_hidden_states.view(num_tokens, hidden_size)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user