mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-27 22:17:40 +08:00
Refactor CPU/GPU extension targets for CMake build (#28026)
Signed-off-by: Abolfazl Shahbazi <12436063+ashahba@users.noreply.github.com>
This commit is contained in:
parent
934a9c3b79
commit
d15afc1fd0
@ -241,7 +241,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
|
||||
message(STATUS "Enabling cumem allocator extension.")
|
||||
# link against cuda driver library
|
||||
list(APPEND CUMEM_LIBS CUDA::cuda_driver)
|
||||
define_gpu_extension_target(
|
||||
define_extension_target(
|
||||
cumem_allocator
|
||||
DESTINATION vllm
|
||||
LANGUAGE CXX
|
||||
@ -858,7 +858,7 @@ if (VLLM_GPU_LANG STREQUAL "HIP")
|
||||
endif()
|
||||
|
||||
message(STATUS "Enabling C extension.")
|
||||
define_gpu_extension_target(
|
||||
define_extension_target(
|
||||
_C
|
||||
DESTINATION vllm
|
||||
LANGUAGE ${VLLM_GPU_LANG}
|
||||
@ -973,7 +973,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
|
||||
endif()
|
||||
|
||||
message(STATUS "Enabling moe extension.")
|
||||
define_gpu_extension_target(
|
||||
define_extension_target(
|
||||
_moe_C
|
||||
DESTINATION vllm
|
||||
LANGUAGE ${VLLM_GPU_LANG}
|
||||
@ -994,7 +994,7 @@ if(VLLM_GPU_LANG STREQUAL "HIP")
|
||||
"csrc/rocm/skinny_gemms.cu"
|
||||
"csrc/rocm/attention.cu")
|
||||
|
||||
define_gpu_extension_target(
|
||||
define_extension_target(
|
||||
_rocm_C
|
||||
DESTINATION vllm
|
||||
LANGUAGE ${VLLM_GPU_LANG}
|
||||
|
||||
@ -343,7 +343,7 @@ message(STATUS "CPU extension source files: ${VLLM_EXT_SRC}")
|
||||
# Define extension targets
|
||||
#
|
||||
|
||||
define_gpu_extension_target(
|
||||
define_extension_target(
|
||||
_C
|
||||
DESTINATION vllm
|
||||
LANGUAGE CXX
|
||||
@ -354,4 +354,4 @@ define_gpu_extension_target(
|
||||
WITH_SOABI
|
||||
)
|
||||
|
||||
message(STATUS "Enabling C extension.")
|
||||
message(STATUS "Enabling C extension.")
|
||||
|
||||
@ -92,7 +92,7 @@ if(FLASH_MLA_ARCHS)
|
||||
SRCS "${FlashMLA_Extension_SOURCES}"
|
||||
CUDA_ARCHS "${FLASH_MLA_ARCHS}")
|
||||
|
||||
define_gpu_extension_target(
|
||||
define_extension_target(
|
||||
_flashmla_C
|
||||
DESTINATION vllm
|
||||
LANGUAGE ${VLLM_GPU_LANG}
|
||||
@ -109,7 +109,7 @@ if(FLASH_MLA_ARCHS)
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:-UPy_LIMITED_API>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-UPy_LIMITED_API>)
|
||||
|
||||
define_gpu_extension_target(
|
||||
define_extension_target(
|
||||
_flashmla_extension_C
|
||||
DESTINATION vllm
|
||||
LANGUAGE ${VLLM_GPU_LANG}
|
||||
|
||||
@ -453,21 +453,20 @@ macro(override_gpu_arches GPU_ARCHES GPU_LANG GPU_SUPPORTED_ARCHES)
|
||||
endmacro()
|
||||
|
||||
#
|
||||
# Define a target named `GPU_MOD_NAME` for a single extension. The
|
||||
# Define a target named `MOD_NAME` for a single extension. The
|
||||
# arguments are:
|
||||
#
|
||||
# DESTINATION <dest> - Module destination directory.
|
||||
# LANGUAGE <lang> - The GPU language for this module, e.g CUDA, HIP,
|
||||
# etc.
|
||||
# LANGUAGE <lang> - The language for this module, e.g. CUDA, HIP,
|
||||
# CXX, etc.
|
||||
# SOURCES <sources> - List of source files relative to CMakeLists.txt
|
||||
# directory.
|
||||
#
|
||||
# Optional arguments:
|
||||
#
|
||||
# ARCHITECTURES <arches> - A list of target GPU architectures in cmake
|
||||
# format.
|
||||
# Refer `CMAKE_CUDA_ARCHITECTURES` documentation
|
||||
# and `CMAKE_HIP_ARCHITECTURES` for more info.
|
||||
# ARCHITECTURES <arches> - A list of target architectures in cmake format.
|
||||
# For GPU, refer to CMAKE_CUDA_ARCHITECTURES and
|
||||
# CMAKE_HIP_ARCHITECTURES for more info.
|
||||
# ARCHITECTURES will use cmake's defaults if
|
||||
# not provided.
|
||||
# COMPILE_FLAGS <flags> - Extra compiler flags passed to NVCC/hip.
|
||||
@ -478,63 +477,61 @@ endmacro()
|
||||
#
|
||||
# Note: optimization level/debug info is set via cmake build type.
|
||||
#
|
||||
function (define_gpu_extension_target GPU_MOD_NAME)
|
||||
function (define_extension_target MOD_NAME)
|
||||
cmake_parse_arguments(PARSE_ARGV 1
|
||||
GPU
|
||||
ARG
|
||||
"WITH_SOABI"
|
||||
"DESTINATION;LANGUAGE;USE_SABI"
|
||||
"SOURCES;ARCHITECTURES;COMPILE_FLAGS;INCLUDE_DIRECTORIES;LIBRARIES")
|
||||
|
||||
# Add hipify preprocessing step when building with HIP/ROCm.
|
||||
if (GPU_LANGUAGE STREQUAL "HIP")
|
||||
hipify_sources_target(GPU_SOURCES ${GPU_MOD_NAME} "${GPU_SOURCES}")
|
||||
if (ARG_LANGUAGE STREQUAL "HIP")
|
||||
hipify_sources_target(ARG_SOURCES ${MOD_NAME} "${ARG_SOURCES}")
|
||||
endif()
|
||||
|
||||
if (GPU_WITH_SOABI)
|
||||
set(GPU_WITH_SOABI WITH_SOABI)
|
||||
if (ARG_WITH_SOABI)
|
||||
set(SOABI_KEYWORD WITH_SOABI)
|
||||
else()
|
||||
set(GPU_WITH_SOABI)
|
||||
set(SOABI_KEYWORD "")
|
||||
endif()
|
||||
|
||||
if (GPU_USE_SABI)
|
||||
Python_add_library(${GPU_MOD_NAME} MODULE USE_SABI ${GPU_USE_SABI} ${GPU_WITH_SOABI} "${GPU_SOURCES}")
|
||||
if (ARG_USE_SABI)
|
||||
Python_add_library(${MOD_NAME} MODULE USE_SABI ${ARG_USE_SABI} ${SOABI_KEYWORD} "${ARG_SOURCES}")
|
||||
else()
|
||||
Python_add_library(${GPU_MOD_NAME} MODULE ${GPU_WITH_SOABI} "${GPU_SOURCES}")
|
||||
Python_add_library(${MOD_NAME} MODULE ${SOABI_KEYWORD} "${ARG_SOURCES}")
|
||||
endif()
|
||||
|
||||
if (GPU_LANGUAGE STREQUAL "HIP")
|
||||
if (ARG_LANGUAGE STREQUAL "HIP")
|
||||
# Make this target dependent on the hipify preprocessor step.
|
||||
add_dependencies(${GPU_MOD_NAME} hipify${GPU_MOD_NAME})
|
||||
add_dependencies(${MOD_NAME} hipify${MOD_NAME})
|
||||
# Make sure we include the hipified versions of the headers, and avoid conflicts with the ones in the original source folder
|
||||
target_include_directories(${GPU_MOD_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/csrc
|
||||
${GPU_INCLUDE_DIRECTORIES})
|
||||
target_include_directories(${MOD_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/csrc
|
||||
${ARG_INCLUDE_DIRECTORIES})
|
||||
else()
|
||||
target_include_directories(${GPU_MOD_NAME} PRIVATE csrc
|
||||
${GPU_INCLUDE_DIRECTORIES})
|
||||
target_include_directories(${MOD_NAME} PRIVATE csrc
|
||||
${ARG_INCLUDE_DIRECTORIES})
|
||||
endif()
|
||||
|
||||
if (GPU_ARCHITECTURES)
|
||||
set_target_properties(${GPU_MOD_NAME} PROPERTIES
|
||||
${GPU_LANGUAGE}_ARCHITECTURES "${GPU_ARCHITECTURES}")
|
||||
if (ARG_ARCHITECTURES)
|
||||
set_target_properties(${MOD_NAME} PROPERTIES
|
||||
${ARG_LANGUAGE}_ARCHITECTURES "${ARG_ARCHITECTURES}")
|
||||
endif()
|
||||
|
||||
target_compile_options(${MOD_NAME} PRIVATE
|
||||
$<$<COMPILE_LANGUAGE:${ARG_LANGUAGE}>:${ARG_COMPILE_FLAGS}>)
|
||||
|
||||
target_compile_options(${GPU_MOD_NAME} PRIVATE
|
||||
$<$<COMPILE_LANGUAGE:${GPU_LANGUAGE}>:${GPU_COMPILE_FLAGS}>)
|
||||
target_compile_definitions(${MOD_NAME} PRIVATE
|
||||
"-DTORCH_EXTENSION_NAME=${MOD_NAME}")
|
||||
|
||||
target_compile_definitions(${GPU_MOD_NAME} PRIVATE
|
||||
"-DTORCH_EXTENSION_NAME=${GPU_MOD_NAME}")
|
||||
|
||||
|
||||
target_link_libraries(${GPU_MOD_NAME} PRIVATE torch ${GPU_LIBRARIES})
|
||||
target_link_libraries(${MOD_NAME} PRIVATE torch ${ARG_LIBRARIES})
|
||||
|
||||
# Don't use `TORCH_LIBRARIES` for CUDA since it pulls in a bunch of
|
||||
# dependencies that are not necessary and may not be installed.
|
||||
if (GPU_LANGUAGE STREQUAL "CUDA")
|
||||
target_link_libraries(${GPU_MOD_NAME} PRIVATE CUDA::cudart CUDA::cuda_driver)
|
||||
if (ARG_LANGUAGE STREQUAL "CUDA")
|
||||
target_link_libraries(${MOD_NAME} PRIVATE torch CUDA::cudart CUDA::cuda_driver ${ARG_LIBRARIES})
|
||||
else()
|
||||
target_link_libraries(${GPU_MOD_NAME} PRIVATE ${TORCH_LIBRARIES})
|
||||
target_link_libraries(${MOD_NAME} PRIVATE torch ${TORCH_LIBRARIES} ${ARG_LIBRARIES})
|
||||
endif()
|
||||
|
||||
install(TARGETS ${GPU_MOD_NAME} LIBRARY DESTINATION ${GPU_DESTINATION} COMPONENT ${GPU_MOD_NAME})
|
||||
install(TARGETS ${MOD_NAME} LIBRARY DESTINATION ${ARG_DESTINATION} COMPONENT ${MOD_NAME})
|
||||
endfunction()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user