Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
This commit is contained in:
Lucas Wilkinson 2025-05-15 04:48:08 +00:00
parent b478b18f07
commit 4e00778a60
2 changed files with 16 additions and 24 deletions

View File

@ -321,8 +321,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
"csrc/quantization/gptq_marlin/gptq_marlin.cu"
"csrc/quantization/gptq_marlin/gptq_marlin_repack.cu"
"csrc/quantization/gptq_marlin/awq_marlin_repack.cu"
NO_ARCH_MSG
"Not building Marlin kernels as no compatible archs found in CUDA target architectures"
)
# AllSpark kernels
@ -332,7 +330,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
SRCS
"csrc/quantization/gptq_allspark/allspark_repack.cu"
"csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu"
NO_ARCH_MSG "Not building AllSpark kernels as no compatible archs found in CUDA target architectures"
)
@ -411,7 +408,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
"csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu"
"csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu"
FLAGS "-DENABLE_NVFP4=1"
NO_ARCH_MSG "Not building NVFP4 as no compatible archs were found."
)
# CUTLASS MLA Archs and flags
@ -421,7 +417,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
ARCHS "10.0a"
SRCS "csrc/attention/mla/cutlass_mla_kernels.cu"
FLAGS "-DENABLE_CUTLASS_MLA=1"
NO_ARCH_MSG "Not building CUTLASS MLA as no compatible archs were found."
)
# Add MLA-specific include directories only to MLA source files
set_source_files_properties(
@ -456,8 +451,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
VERSION_MSG
"Not building Machete kernels as CUDA Compiler version is less than 12.0."
"We recommend upgrading to CUDA 12.0 or later to run w4a16 quantized models on Hopper."
NO_ARCH_MSG
"Not building Machete kernels as no compatible archs found in CUDA target architectures"
)
# if CUDA endif
endif()
@ -503,7 +496,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
GEN_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/csrc/moe/marlin_moe_wna16/generate_kernels.py"
GEN_GLOB "csrc/moe/marlin_moe_wna16/*.cu"
SRCS "csrc/moe/moe_wna16.cu"
NO_ARCH_MSG "Not building Marlin MOE kernels as no compatible archs found in CUDA target architectures"
OUT_SRCS_VAR VLLM_MOE_EXT_SRC
)
endif()

View File

@ -114,22 +114,22 @@ function (hipify_sources_target OUT_SRCS NAME ORIG_SRCS)
set(${OUT_SRCS} ${HIP_SRCS} PARENT_SCOPE)
endfunction()
## Macro to conditionally include CUDA sources based on architecture and CUDA
## compiler version, optionally generating sources via a Python script.
## Usage:
## optional_cuda_sources(
## NAME <name>
## [MIN_VERSION <version>]
## ARCHS <arch1;arch2;...>
## SRCS <static_source1> [<static_source2> ...]
## [FLAGS <flag1> ...]
## [VERSION_MSG <line1> [<line2> ...]]
## [NO_ARCH_MSG <line1> [<line2> ...]]
## [GEN_SCRIPT <path/to/generate_script.py>]
## [GEN_GLOB <glob_pattern_for_generated_sources>]
## [APPEND_ARCHS <var_to_append_archs>]
## This will run GEN_SCRIPT once when version and arch checks pass, globbing
## sources matching GEN_GLOB and appending them alongside SRCS.
# Macro to conditionally include CUDA sources based on architecture and CUDA
# compiler version, optionally generating sources via a Python script.
# Usage:
# optional_cuda_sources(
# NAME <name>
# [MIN_VERSION <version>]
# ARCHS <arch1;arch2;...>
# SRCS <static_source1> [<static_source2> ...]
# [FLAGS <flag1> ...]
# [VERSION_MSG <line1> [<line2> ...]]
# [NO_ARCH_MSG <line1> [<line2> ...]]
# [GEN_SCRIPT <path/to/generate_script.py>]
# [GEN_GLOB <glob_pattern_for_generated_sources>]
# [APPEND_ARCHS <var_to_append_archs>]
# This will run GEN_SCRIPT once when version and arch checks pass, globbing
# sources matching GEN_GLOB and appending them alongside SRCS.
macro(optional_cuda_sources)
set(oneValueArgs NAME MIN_VERSION APPEND_ARCHS GEN_SCRIPT GEN_GLOB OUT_SRCS_VAR)
set(multiValueArgs ARCHS SRCS FLAGS VERSION_MSG NO_ARCH_MSG)