mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 09:15:55 +08:00
Fix FA2 fallback for Blackwell V1 (#19781)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
dfada85eee
commit
36239f79dd
@ -255,7 +255,7 @@ class CudaPlatformBase(Platform):
|
|||||||
"install FlashInfer for better performance.")
|
"install FlashInfer for better performance.")
|
||||||
pass
|
pass
|
||||||
# FlashAttention is the default for SM 8.0+ GPUs
|
# FlashAttention is the default for SM 8.0+ GPUs
|
||||||
elif cls.has_device_capability(80):
|
if cls.has_device_capability(80):
|
||||||
logger.info_once("Using Flash Attention backend on V1 engine.")
|
logger.info_once("Using Flash Attention backend on V1 engine.")
|
||||||
return ("vllm.v1.attention.backends."
|
return ("vllm.v1.attention.backends."
|
||||||
"flash_attn.FlashAttentionBackend")
|
"flash_attn.FlashAttentionBackend")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user