From 41ca7eb49192cbee65986527319c6eb929d6aa7c Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Thu, 24 Apr 2025 23:12:21 -0400 Subject: [PATCH] [Attention] FA3 decode perf improvement - single mma warp group support for head dim 128 (#16864) Signed-off-by: Lucas Wilkinson --- cmake/external_projects/vllm_flash_attn.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/external_projects/vllm_flash_attn.cmake b/cmake/external_projects/vllm_flash_attn.cmake index 110ef266c665..c60a54745519 100644 --- a/cmake/external_projects/vllm_flash_attn.cmake +++ b/cmake/external_projects/vllm_flash_attn.cmake @@ -38,7 +38,7 @@ else() FetchContent_Declare( vllm-flash-attn GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git - GIT_TAG 0a721daebe4fa7149f06ecf3d3eabeb6dcd0f1fa + GIT_TAG e93779c59ba4905e56e5c39dc2c1904ada71fa21 GIT_PROGRESS TRUE # Don't share the vllm-flash-attn build between build types BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn