From e08a3a3fdbdb5408f904a237b31ff2447a336b2f Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Sat, 20 Sep 2025 04:16:56 -0400
Subject: [PATCH] [CI Failure] Disable FlashInfer RoPE to unblock CI (#25299)

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 .../model_executor/layers/rotary_embedding/base.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/vllm/model_executor/layers/rotary_embedding/base.py b/vllm/model_executor/layers/rotary_embedding/base.py
index 3dc249ae9adb9..1c3576bee5392 100644
--- a/vllm/model_executor/layers/rotary_embedding/base.py
+++ b/vllm/model_executor/layers/rotary_embedding/base.py
@@ -6,8 +6,6 @@ from typing import Optional
 import torch
 
 from vllm.model_executor.custom_op import CustomOp
-from vllm.platforms import current_platform
-from vllm.utils.flashinfer import has_flashinfer
 
 from .common import apply_rotary_emb_torch
 
@@ -32,13 +30,15 @@ class RotaryEmbedding(CustomOp):
         self.base = base
         self.is_neox_style = is_neox_style
         self.dtype = dtype
+        # TODO(mgoin): disabled for now due to failures
         # Flashinfer only supports head_size=64, 128, 256, 512.
         # https://github.com/flashinfer-ai/flashinfer/blob/ebfd655efe830048dba5d582aaa61d61d1cf9a87/include/flashinfer/utils.cuh#L174-L202
-        self.use_flashinfer = (self.enabled()
-                               and dtype in (torch.float16, torch.bfloat16)
-                               and current_platform.is_cuda()
-                               and has_flashinfer()
-                               and self.head_size in [64, 128, 256, 512])
+        # self.use_flashinfer = (self.enabled()
+        #                        and dtype in (torch.float16, torch.bfloat16)
+        #                        and current_platform.is_cuda()
+        #                        and has_flashinfer()
+        #                        and self.head_size in [64, 128, 256, 512])
+        self.use_flashinfer = False
 
         cache = self._compute_cos_sin_cache()
         if not self.use_flashinfer: