mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 00:55:26 +08:00
[Attention][Bugfix] Fix FA sink support (#28660)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
parent
119c4927b3
commit
f9f3b596f3
@ -130,6 +130,12 @@ class FlashAttentionBackend(AttentionBackend):
|
|||||||
return flash_attn_supports_fp8()
|
return flash_attn_supports_fp8()
|
||||||
return kv_cache_dtype in ["auto"]
|
return kv_cache_dtype in ["auto"]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supports_sink(cls) -> bool:
|
||||||
|
if not is_flash_attn_varlen_func_available():
|
||||||
|
return False
|
||||||
|
return flash_attn_supports_sinks()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def supports_compute_capability(cls, capability: DeviceCapability) -> bool:
|
def supports_compute_capability(cls, capability: DeviceCapability) -> bool:
|
||||||
return capability >= DeviceCapability(8, 0)
|
return capability >= DeviceCapability(8, 0)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user