[Bugfix] Flash attention arches not getting set properly (#9062)

2025-12-18 05:35:01 +08:00 · 2024-10-04 11:43:15 -04:00 · 2024-10-04 11:43:15 -04:00 · 22482e495e
commit 22482e495e
parent 3d826d2c52
1 changed files with 11 additions and 0 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -482,6 +482,17 @@ if (NOT VLLM_TARGET_DEVICE STREQUAL "cuda")
  return()
 endif ()
 # vLLM flash attention requires VLLM_GPU_ARCHES to contain the set of target  
 # arches in the CMake syntax (75-real, 89-virtual, etc), since we clear the 
 # arches in the CUDA case (and instead set the gencodes on a per file basis) 
 # we need to manually set VLLM_GPU_ARCHES here.
 if(VLLM_GPU_LANG STREQUAL "CUDA")
  foreach(_ARCH ${CUDA_ARCHS})
    string(REPLACE "." "" _ARCH "${_ARCH}")
    list(APPEND VLLM_GPU_ARCHES "${_ARCH}-real")
  endforeach()
 endif()
 #
 # Build vLLM flash attention from source
 #