mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 05:25:00 +08:00
[Attention][Bugfix] Fix FA sink support (#28660)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
parent
119c4927b3
commit
f9f3b596f3
@ -130,6 +130,12 @@ class FlashAttentionBackend(AttentionBackend):
|
||||
return flash_attn_supports_fp8()
|
||||
return kv_cache_dtype in ["auto"]
|
||||
|
||||
@classmethod
|
||||
def supports_sink(cls) -> bool:
|
||||
if not is_flash_attn_varlen_func_available():
|
||||
return False
|
||||
return flash_attn_supports_sinks()
|
||||
|
||||
@classmethod
|
||||
def supports_compute_capability(cls, capability: DeviceCapability) -> bool:
|
||||
return capability >= DeviceCapability(8, 0)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user