From 467bef18a353e8ca133a0a5611d6f4384ea351b6 Mon Sep 17 00:00:00 2001 From: Rachel Guo <35738743+YUNQIUGUO@users.noreply.github.com> Date: Tue, 10 Jun 2025 01:48:51 -0700 Subject: [PATCH] [BugFix][FlashInfer] Fix attention backend interface mismatch with unexpected keyword `use_irope` (#19134) Signed-off-by: Yunqiu Guo --- vllm/v1/attention/backends/flashinfer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py index f1b61c152a9d..b15bb4b3152a 100755 --- a/vllm/v1/attention/backends/flashinfer.py +++ b/vllm/v1/attention/backends/flashinfer.py @@ -508,7 +508,12 @@ class FlashInferImpl(AttentionImpl): logits_soft_cap: Optional[float] = None, attn_type: AttentionType = AttentionType.DECODER, kv_sharing_target_layer_name: Optional[int] = None, + use_irope: bool = False, ) -> None: + if use_irope: + logger.warning_once( + "Using irope in FlashInfer is not supported yet, it will fall" + " back to global attention for long context.") self.num_heads = num_heads self.head_size = head_size self.scale = float(scale)