mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-12 15:45:02 +08:00
[BugFix][FlashInfer] Fix attention backend interface mismatch with unexpected keyword use_irope (#19134)
Signed-off-by: Yunqiu Guo <guorachel@meta.com>
This commit is contained in:
parent
5f1ac1e1d1
commit
467bef18a3
@ -508,7 +508,12 @@ class FlashInferImpl(AttentionImpl):
|
|||||||
logits_soft_cap: Optional[float] = None,
|
logits_soft_cap: Optional[float] = None,
|
||||||
attn_type: AttentionType = AttentionType.DECODER,
|
attn_type: AttentionType = AttentionType.DECODER,
|
||||||
kv_sharing_target_layer_name: Optional[int] = None,
|
kv_sharing_target_layer_name: Optional[int] = None,
|
||||||
|
use_irope: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
if use_irope:
|
||||||
|
logger.warning_once(
|
||||||
|
"Using irope in FlashInfer is not supported yet, it will fall"
|
||||||
|
" back to global attention for long context.")
|
||||||
self.num_heads = num_heads
|
self.num_heads = num_heads
|
||||||
self.head_size = head_size
|
self.head_size = head_size
|
||||||
self.scale = float(scale)
|
self.scale = float(scale)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user