mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 03:35:17 +08:00
[Bugfix] Fix incorrect use of hidden_states for shared_experts due to do_naive_dispatch_combine (#28740)
Signed-off-by: Alexander Matveev <amatveev@redhat.com>
This commit is contained in:
parent
2e0ad629b0
commit
e5c78956c0
@ -1749,14 +1749,16 @@ class FusedMoE(CustomOp):
|
||||
|
||||
with sp_ctx:
|
||||
if do_naive_dispatch_combine:
|
||||
hidden_states, router_logits = get_ep_group().dispatch(
|
||||
hidden_states_combined, router_logits = get_ep_group().dispatch(
|
||||
hidden_states, router_logits, self.is_sequence_parallel
|
||||
)
|
||||
|
||||
# Matrix multiply.
|
||||
final_hidden_states = self.quant_method.apply(
|
||||
layer=self,
|
||||
x=hidden_states,
|
||||
x=hidden_states_combined
|
||||
if do_naive_dispatch_combine
|
||||
else hidden_states,
|
||||
router_logits=router_logits,
|
||||
top_k=self.top_k,
|
||||
renormalize=self.renormalize,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user