mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 12:24:56 +08:00
[Attention] FA3 decode perf improvement - single mma warp group support for head dim 128 (#16864)
Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
This commit is contained in:
parent
eef364723c
commit
41ca7eb491
@ -38,7 +38,7 @@ else()
|
|||||||
FetchContent_Declare(
|
FetchContent_Declare(
|
||||||
vllm-flash-attn
|
vllm-flash-attn
|
||||||
GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git
|
GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git
|
||||||
GIT_TAG 0a721daebe4fa7149f06ecf3d3eabeb6dcd0f1fa
|
GIT_TAG e93779c59ba4905e56e5c39dc2c1904ada71fa21
|
||||||
GIT_PROGRESS TRUE
|
GIT_PROGRESS TRUE
|
||||||
# Don't share the vllm-flash-attn build between build types
|
# Don't share the vllm-flash-attn build between build types
|
||||||
BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn
|
BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user