From 1133c53c0e6814d07c2754d9d4049b5583ca5ab1 Mon Sep 17 00:00:00 2001 From: mgoin Date: Wed, 24 Sep 2025 14:35:39 -0400 Subject: [PATCH] Suppress FA3 "wgmma.mma_async instructions are serialized" warning Signed-off-by: mgoin --- cmake/external_projects/vllm_flash_attn.cmake | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cmake/external_projects/vllm_flash_attn.cmake b/cmake/external_projects/vllm_flash_attn.cmake index 3d32121f13ac2..8e5908889f7e5 100644 --- a/cmake/external_projects/vllm_flash_attn.cmake +++ b/cmake/external_projects/vllm_flash_attn.cmake @@ -62,6 +62,12 @@ install(CODE "set(CMAKE_INSTALL_PREFIX \"\${CMAKE_INSTALL_PREFIX}/vllm/\")" ALL_ FetchContent_MakeAvailable(vllm-flash-attn) message(STATUS "vllm-flash-attn is available at ${vllm-flash-attn_SOURCE_DIR}") +# Suppress ptxas warnings for flash attention compilation +if(VLLM_GPU_LANG STREQUAL "CUDA") + # Add ptxas flags to suppress C7520 warnings about wgmma.mma_async serialization + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xptxas --diag_suppress=7520") +endif() + # Restore the install prefix install(CODE "set(CMAKE_INSTALL_PREFIX \"\${OLD_CMAKE_INSTALL_PREFIX}\")" ALL_COMPONENTS) install(CODE "set(CMAKE_INSTALL_LOCAL_ONLY TRUE)" ALL_COMPONENTS)