mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 06:45:00 +08:00
[Bugfix] Fix incorrect use of hidden_states for shared_experts due to do_naive_dispatch_combine (#28740)
Signed-off-by: Alexander Matveev <amatveev@redhat.com>
This commit is contained in:
parent
2e0ad629b0
commit
e5c78956c0
@ -1749,14 +1749,16 @@ class FusedMoE(CustomOp):
|
|||||||
|
|
||||||
with sp_ctx:
|
with sp_ctx:
|
||||||
if do_naive_dispatch_combine:
|
if do_naive_dispatch_combine:
|
||||||
hidden_states, router_logits = get_ep_group().dispatch(
|
hidden_states_combined, router_logits = get_ep_group().dispatch(
|
||||||
hidden_states, router_logits, self.is_sequence_parallel
|
hidden_states, router_logits, self.is_sequence_parallel
|
||||||
)
|
)
|
||||||
|
|
||||||
# Matrix multiply.
|
# Matrix multiply.
|
||||||
final_hidden_states = self.quant_method.apply(
|
final_hidden_states = self.quant_method.apply(
|
||||||
layer=self,
|
layer=self,
|
||||||
x=hidden_states,
|
x=hidden_states_combined
|
||||||
|
if do_naive_dispatch_combine
|
||||||
|
else hidden_states,
|
||||||
router_logits=router_logits,
|
router_logits=router_logits,
|
||||||
top_k=self.top_k,
|
top_k=self.top_k,
|
||||||
renormalize=self.renormalize,
|
renormalize=self.renormalize,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user