[BugFix][FlashInfer] Fix attention backend interface mismatch with unexpected keyword use_irope (#19134)

Signed-off-by: Yunqiu Guo <guorachel@meta.com>
This commit is contained in:
Rachel Guo 2025-06-10 01:48:51 -07:00 committed by GitHub
parent 5f1ac1e1d1
commit 467bef18a3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -508,7 +508,12 @@ class FlashInferImpl(AttentionImpl):
logits_soft_cap: Optional[float] = None,
attn_type: AttentionType = AttentionType.DECODER,
kv_sharing_target_layer_name: Optional[int] = None,
use_irope: bool = False,
) -> None:
if use_irope:
logger.warning_once(
"Using irope in FlashInfer is not supported yet, it will fall"
" back to global attention for long context.")
self.num_heads = num_heads
self.head_size = head_size
self.scale = float(scale)