[Perf] Enable separate shared_experts stream only for CUDA (#30085)

Signed-off-by: Alexander Matveev <amatveev@redhat.com>
2026-01-23 11:24:31 +08:00 · 2025-12-04 19:03:17 -05:00 · 2025-12-04 19:03:17 -05:00 · 4470ee2f90
commit 4470ee2f90
parent 690cc3ef20
1 changed files with 2 additions and 1 deletions
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@ -863,7 +863,8 @@ class FusedMoE(CustomOp):
        use_chunked_impl: bool,
    ) -> tuple[bool, torch.Tensor | None]:
        use_shared_experts_stream = (
-            has_separate_shared_experts
+            current_platform.is_cuda()
+            and has_separate_shared_experts
            and not use_chunked_impl
            and self.shared_experts_stream is not None
            and (