mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-18 05:35:01 +08:00
[Bugfix] Flash attention arches not getting set properly (#9062)
This commit is contained in:
parent
3d826d2c52
commit
22482e495e
@ -482,6 +482,17 @@ if (NOT VLLM_TARGET_DEVICE STREQUAL "cuda")
|
|||||||
return()
|
return()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
# vLLM flash attention requires VLLM_GPU_ARCHES to contain the set of target
|
||||||
|
# arches in the CMake syntax (75-real, 89-virtual, etc), since we clear the
|
||||||
|
# arches in the CUDA case (and instead set the gencodes on a per file basis)
|
||||||
|
# we need to manually set VLLM_GPU_ARCHES here.
|
||||||
|
if(VLLM_GPU_LANG STREQUAL "CUDA")
|
||||||
|
foreach(_ARCH ${CUDA_ARCHS})
|
||||||
|
string(REPLACE "." "" _ARCH "${_ARCH}")
|
||||||
|
list(APPEND VLLM_GPU_ARCHES "${_ARCH}-real")
|
||||||
|
endforeach()
|
||||||
|
endif()
|
||||||
|
|
||||||
#
|
#
|
||||||
# Build vLLM flash attention from source
|
# Build vLLM flash attention from source
|
||||||
#
|
#
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user