[Attention] FA3 decode perf improvement - single mma warp group support for head dim 128 (#16864)

Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
This commit is contained in:
Lucas Wilkinson 2025-04-24 23:12:21 -04:00 committed by GitHub
parent eef364723c
commit 41ca7eb491
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -38,7 +38,7 @@ else()
FetchContent_Declare(
vllm-flash-attn
GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git
GIT_TAG 0a721daebe4fa7149f06ecf3d3eabeb6dcd0f1fa
GIT_TAG e93779c59ba4905e56e5c39dc2c1904ada71fa21
GIT_PROGRESS TRUE
# Don't share the vllm-flash-attn build between build types
BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn