[CI Failure] Disable FlashInfer RoPE to unblock CI (#25299)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin 2025-09-20 04:16:56 -04:00 committed by GitHub
parent 3d9a1d2de5
commit e08a3a3fdb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -6,8 +6,6 @@ from typing import Optional
import torch import torch
from vllm.model_executor.custom_op import CustomOp from vllm.model_executor.custom_op import CustomOp
from vllm.platforms import current_platform
from vllm.utils.flashinfer import has_flashinfer
from .common import apply_rotary_emb_torch from .common import apply_rotary_emb_torch
@ -32,13 +30,15 @@ class RotaryEmbedding(CustomOp):
self.base = base self.base = base
self.is_neox_style = is_neox_style self.is_neox_style = is_neox_style
self.dtype = dtype self.dtype = dtype
# TODO(mgoin): disabled for now due to failures
# Flashinfer only supports head_size=64, 128, 256, 512. # Flashinfer only supports head_size=64, 128, 256, 512.
# https://github.com/flashinfer-ai/flashinfer/blob/ebfd655efe830048dba5d582aaa61d61d1cf9a87/include/flashinfer/utils.cuh#L174-L202 # https://github.com/flashinfer-ai/flashinfer/blob/ebfd655efe830048dba5d582aaa61d61d1cf9a87/include/flashinfer/utils.cuh#L174-L202
self.use_flashinfer = (self.enabled() # self.use_flashinfer = (self.enabled()
and dtype in (torch.float16, torch.bfloat16) # and dtype in (torch.float16, torch.bfloat16)
and current_platform.is_cuda() # and current_platform.is_cuda()
and has_flashinfer() # and has_flashinfer()
and self.head_size in [64, 128, 256, 512]) # and self.head_size in [64, 128, 256, 512])
self.use_flashinfer = False
cache = self._compute_cos_sin_cache() cache = self._compute_cos_sin_cache()
if not self.use_flashinfer: if not self.use_flashinfer: