mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-21 07:25:02 +08:00
[Bugfix] use flash attn on sm90 (#22933)
Signed-off-by: Yongye Zhu <zyy1102000@gmail.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
parent
919234fe17
commit
39cd09dc86
@ -316,7 +316,7 @@ class CudaPlatformBase(Platform):
|
||||
|
||||
# FlashAttention is the default for SM 8.0+ GPUs
|
||||
if cls.has_device_capability(80):
|
||||
if has_sink:
|
||||
if has_sink and not cls.is_device_capability(90):
|
||||
logger.info_once("Using Triton backend on V1 engine.")
|
||||
return TRITON_ATTN_VLLM_V1
|
||||
if is_default_backend_supported := is_attn_backend_supported(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user