diff --git a/CMakeLists.txt b/CMakeLists.txt index 7cb94f919f123..0e9fa63b178ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -241,7 +241,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") message(STATUS "Enabling cumem allocator extension.") # link against cuda driver library list(APPEND CUMEM_LIBS CUDA::cuda_driver) - define_gpu_extension_target( + define_extension_target( cumem_allocator DESTINATION vllm LANGUAGE CXX @@ -858,7 +858,7 @@ if (VLLM_GPU_LANG STREQUAL "HIP") endif() message(STATUS "Enabling C extension.") -define_gpu_extension_target( +define_extension_target( _C DESTINATION vllm LANGUAGE ${VLLM_GPU_LANG} @@ -973,7 +973,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") endif() message(STATUS "Enabling moe extension.") -define_gpu_extension_target( +define_extension_target( _moe_C DESTINATION vllm LANGUAGE ${VLLM_GPU_LANG} @@ -994,7 +994,7 @@ if(VLLM_GPU_LANG STREQUAL "HIP") "csrc/rocm/skinny_gemms.cu" "csrc/rocm/attention.cu") - define_gpu_extension_target( + define_extension_target( _rocm_C DESTINATION vllm LANGUAGE ${VLLM_GPU_LANG} diff --git a/cmake/cpu_extension.cmake b/cmake/cpu_extension.cmake index 192d349b30099..dbda19fbcbf20 100644 --- a/cmake/cpu_extension.cmake +++ b/cmake/cpu_extension.cmake @@ -343,7 +343,7 @@ message(STATUS "CPU extension source files: ${VLLM_EXT_SRC}") # Define extension targets # -define_gpu_extension_target( +define_extension_target( _C DESTINATION vllm LANGUAGE CXX @@ -354,4 +354,4 @@ define_gpu_extension_target( WITH_SOABI ) -message(STATUS "Enabling C extension.") \ No newline at end of file +message(STATUS "Enabling C extension.") diff --git a/cmake/external_projects/flashmla.cmake b/cmake/external_projects/flashmla.cmake index f661084ec48ae..2cf3c1a755d3c 100644 --- a/cmake/external_projects/flashmla.cmake +++ b/cmake/external_projects/flashmla.cmake @@ -92,7 +92,7 @@ if(FLASH_MLA_ARCHS) SRCS "${FlashMLA_Extension_SOURCES}" CUDA_ARCHS "${FLASH_MLA_ARCHS}") - define_gpu_extension_target( + define_extension_target( _flashmla_C DESTINATION vllm LANGUAGE ${VLLM_GPU_LANG} @@ -109,7 +109,7 @@ if(FLASH_MLA_ARCHS) $<$:-UPy_LIMITED_API> $<$:-UPy_LIMITED_API>) - define_gpu_extension_target( + define_extension_target( _flashmla_extension_C DESTINATION vllm LANGUAGE ${VLLM_GPU_LANG} diff --git a/cmake/utils.cmake b/cmake/utils.cmake index c2181d4549236..ca0062ba4fabe 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -453,21 +453,20 @@ macro(override_gpu_arches GPU_ARCHES GPU_LANG GPU_SUPPORTED_ARCHES) endmacro() # -# Define a target named `GPU_MOD_NAME` for a single extension. The +# Define a target named `MOD_NAME` for a single extension. The # arguments are: # # DESTINATION - Module destination directory. -# LANGUAGE - The GPU language for this module, e.g CUDA, HIP, -# etc. +# LANGUAGE - The language for this module, e.g. CUDA, HIP, +# CXX, etc. # SOURCES - List of source files relative to CMakeLists.txt # directory. # # Optional arguments: # -# ARCHITECTURES - A list of target GPU architectures in cmake -# format. -# Refer `CMAKE_CUDA_ARCHITECTURES` documentation -# and `CMAKE_HIP_ARCHITECTURES` for more info. +# ARCHITECTURES - A list of target architectures in cmake format. +# For GPU, refer to CMAKE_CUDA_ARCHITECTURES and +# CMAKE_HIP_ARCHITECTURES for more info. # ARCHITECTURES will use cmake's defaults if # not provided. # COMPILE_FLAGS - Extra compiler flags passed to NVCC/hip. @@ -478,63 +477,61 @@ endmacro() # # Note: optimization level/debug info is set via cmake build type. # -function (define_gpu_extension_target GPU_MOD_NAME) +function (define_extension_target MOD_NAME) cmake_parse_arguments(PARSE_ARGV 1 - GPU + ARG "WITH_SOABI" "DESTINATION;LANGUAGE;USE_SABI" "SOURCES;ARCHITECTURES;COMPILE_FLAGS;INCLUDE_DIRECTORIES;LIBRARIES") # Add hipify preprocessing step when building with HIP/ROCm. - if (GPU_LANGUAGE STREQUAL "HIP") - hipify_sources_target(GPU_SOURCES ${GPU_MOD_NAME} "${GPU_SOURCES}") + if (ARG_LANGUAGE STREQUAL "HIP") + hipify_sources_target(ARG_SOURCES ${MOD_NAME} "${ARG_SOURCES}") endif() - if (GPU_WITH_SOABI) - set(GPU_WITH_SOABI WITH_SOABI) + if (ARG_WITH_SOABI) + set(SOABI_KEYWORD WITH_SOABI) else() - set(GPU_WITH_SOABI) + set(SOABI_KEYWORD "") endif() - if (GPU_USE_SABI) - Python_add_library(${GPU_MOD_NAME} MODULE USE_SABI ${GPU_USE_SABI} ${GPU_WITH_SOABI} "${GPU_SOURCES}") + if (ARG_USE_SABI) + Python_add_library(${MOD_NAME} MODULE USE_SABI ${ARG_USE_SABI} ${SOABI_KEYWORD} "${ARG_SOURCES}") else() - Python_add_library(${GPU_MOD_NAME} MODULE ${GPU_WITH_SOABI} "${GPU_SOURCES}") + Python_add_library(${MOD_NAME} MODULE ${SOABI_KEYWORD} "${ARG_SOURCES}") endif() - if (GPU_LANGUAGE STREQUAL "HIP") + if (ARG_LANGUAGE STREQUAL "HIP") # Make this target dependent on the hipify preprocessor step. - add_dependencies(${GPU_MOD_NAME} hipify${GPU_MOD_NAME}) + add_dependencies(${MOD_NAME} hipify${MOD_NAME}) # Make sure we include the hipified versions of the headers, and avoid conflicts with the ones in the original source folder - target_include_directories(${GPU_MOD_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/csrc - ${GPU_INCLUDE_DIRECTORIES}) + target_include_directories(${MOD_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/csrc + ${ARG_INCLUDE_DIRECTORIES}) else() - target_include_directories(${GPU_MOD_NAME} PRIVATE csrc - ${GPU_INCLUDE_DIRECTORIES}) + target_include_directories(${MOD_NAME} PRIVATE csrc + ${ARG_INCLUDE_DIRECTORIES}) endif() - if (GPU_ARCHITECTURES) - set_target_properties(${GPU_MOD_NAME} PROPERTIES - ${GPU_LANGUAGE}_ARCHITECTURES "${GPU_ARCHITECTURES}") + if (ARG_ARCHITECTURES) + set_target_properties(${MOD_NAME} PROPERTIES + ${ARG_LANGUAGE}_ARCHITECTURES "${ARG_ARCHITECTURES}") endif() + target_compile_options(${MOD_NAME} PRIVATE + $<$:${ARG_COMPILE_FLAGS}>) - target_compile_options(${GPU_MOD_NAME} PRIVATE - $<$:${GPU_COMPILE_FLAGS}>) + target_compile_definitions(${MOD_NAME} PRIVATE + "-DTORCH_EXTENSION_NAME=${MOD_NAME}") - target_compile_definitions(${GPU_MOD_NAME} PRIVATE - "-DTORCH_EXTENSION_NAME=${GPU_MOD_NAME}") - - - target_link_libraries(${GPU_MOD_NAME} PRIVATE torch ${GPU_LIBRARIES}) + target_link_libraries(${MOD_NAME} PRIVATE torch ${ARG_LIBRARIES}) # Don't use `TORCH_LIBRARIES` for CUDA since it pulls in a bunch of # dependencies that are not necessary and may not be installed. - if (GPU_LANGUAGE STREQUAL "CUDA") - target_link_libraries(${GPU_MOD_NAME} PRIVATE CUDA::cudart CUDA::cuda_driver) + if (ARG_LANGUAGE STREQUAL "CUDA") + target_link_libraries(${MOD_NAME} PRIVATE torch CUDA::cudart CUDA::cuda_driver ${ARG_LIBRARIES}) else() - target_link_libraries(${GPU_MOD_NAME} PRIVATE ${TORCH_LIBRARIES}) + target_link_libraries(${MOD_NAME} PRIVATE torch ${TORCH_LIBRARIES} ${ARG_LIBRARIES}) endif() - install(TARGETS ${GPU_MOD_NAME} LIBRARY DESTINATION ${GPU_DESTINATION} COMPONENT ${GPU_MOD_NAME}) + install(TARGETS ${MOD_NAME} LIBRARY DESTINATION ${ARG_DESTINATION} COMPONENT ${MOD_NAME}) endfunction()