[Perf] Enable separate shared_experts stream only for CUDA (#30085)

Signed-off-by: Alexander Matveev <amatveev@redhat.com>
This commit is contained in:
Alexander Matveev 2025-12-04 19:03:17 -05:00 committed by GitHub
parent 690cc3ef20
commit 4470ee2f90
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -863,7 +863,8 @@ class FusedMoE(CustomOp):
use_chunked_impl: bool,
) -> tuple[bool, torch.Tensor | None]:
use_shared_experts_stream = (
has_separate_shared_experts
current_platform.is_cuda()
and has_separate_shared_experts
and not use_chunked_impl
and self.shared_experts_stream is not None
and (