diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 902a77987d61a..6001b6d83c398 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -863,7 +863,8 @@ class FusedMoE(CustomOp): use_chunked_impl: bool, ) -> tuple[bool, torch.Tensor | None]: use_shared_experts_stream = ( - has_separate_shared_experts + current_platform.is_cuda() + and has_separate_shared_experts and not use_chunked_impl and self.shared_experts_stream is not None and (