From f9f3b596f374c4a01acef275ee1f35398bb05164 Mon Sep 17 00:00:00 2001 From: Matthew Bonanni Date: Thu, 13 Nov 2025 12:20:01 -0600 Subject: [PATCH] [Attention][Bugfix] Fix FA sink support (#28660) Signed-off-by: Matthew Bonanni --- vllm/v1/attention/backends/flash_attn.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py index bfb4a45c2b56..81623549ae85 100755 --- a/vllm/v1/attention/backends/flash_attn.py +++ b/vllm/v1/attention/backends/flash_attn.py @@ -130,6 +130,12 @@ class FlashAttentionBackend(AttentionBackend): return flash_attn_supports_fp8() return kv_cache_dtype in ["auto"] + @classmethod + def supports_sink(cls) -> bool: + if not is_flash_attn_varlen_func_available(): + return False + return flash_attn_supports_sinks() + @classmethod def supports_compute_capability(cls, capability: DeviceCapability) -> bool: return capability >= DeviceCapability(8, 0)