mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-23 11:24:31 +08:00
[Perf] Enable separate shared_experts stream only for CUDA (#30085)
Signed-off-by: Alexander Matveev <amatveev@redhat.com>
This commit is contained in:
parent
690cc3ef20
commit
4470ee2f90
@ -863,7 +863,8 @@ class FusedMoE(CustomOp):
|
||||
use_chunked_impl: bool,
|
||||
) -> tuple[bool, torch.Tensor | None]:
|
||||
use_shared_experts_stream = (
|
||||
has_separate_shared_experts
|
||||
current_platform.is_cuda()
|
||||
and has_separate_shared_experts
|
||||
and not use_chunked_impl
|
||||
and self.shared_experts_stream is not None
|
||||
and (
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user