mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-12 04:07:04 +08:00
cleanup
Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
This commit is contained in:
parent
b478b18f07
commit
4e00778a60
@ -321,8 +321,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
|
||||
"csrc/quantization/gptq_marlin/gptq_marlin.cu"
|
||||
"csrc/quantization/gptq_marlin/gptq_marlin_repack.cu"
|
||||
"csrc/quantization/gptq_marlin/awq_marlin_repack.cu"
|
||||
NO_ARCH_MSG
|
||||
"Not building Marlin kernels as no compatible archs found in CUDA target architectures"
|
||||
)
|
||||
|
||||
# AllSpark kernels
|
||||
@ -332,7 +330,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
|
||||
SRCS
|
||||
"csrc/quantization/gptq_allspark/allspark_repack.cu"
|
||||
"csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu"
|
||||
NO_ARCH_MSG "Not building AllSpark kernels as no compatible archs found in CUDA target architectures"
|
||||
)
|
||||
|
||||
|
||||
@ -411,7 +408,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
|
||||
"csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu"
|
||||
"csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu"
|
||||
FLAGS "-DENABLE_NVFP4=1"
|
||||
NO_ARCH_MSG "Not building NVFP4 as no compatible archs were found."
|
||||
)
|
||||
|
||||
# CUTLASS MLA Archs and flags
|
||||
@ -421,7 +417,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
|
||||
ARCHS "10.0a"
|
||||
SRCS "csrc/attention/mla/cutlass_mla_kernels.cu"
|
||||
FLAGS "-DENABLE_CUTLASS_MLA=1"
|
||||
NO_ARCH_MSG "Not building CUTLASS MLA as no compatible archs were found."
|
||||
)
|
||||
# Add MLA-specific include directories only to MLA source files
|
||||
set_source_files_properties(
|
||||
@ -456,8 +451,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
|
||||
VERSION_MSG
|
||||
"Not building Machete kernels as CUDA Compiler version is less than 12.0."
|
||||
"We recommend upgrading to CUDA 12.0 or later to run w4a16 quantized models on Hopper."
|
||||
NO_ARCH_MSG
|
||||
"Not building Machete kernels as no compatible archs found in CUDA target architectures"
|
||||
)
|
||||
# if CUDA endif
|
||||
endif()
|
||||
@ -503,7 +496,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
|
||||
GEN_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/csrc/moe/marlin_moe_wna16/generate_kernels.py"
|
||||
GEN_GLOB "csrc/moe/marlin_moe_wna16/*.cu"
|
||||
SRCS "csrc/moe/moe_wna16.cu"
|
||||
NO_ARCH_MSG "Not building Marlin MOE kernels as no compatible archs found in CUDA target architectures"
|
||||
OUT_SRCS_VAR VLLM_MOE_EXT_SRC
|
||||
)
|
||||
endif()
|
||||
|
||||
@ -114,22 +114,22 @@ function (hipify_sources_target OUT_SRCS NAME ORIG_SRCS)
|
||||
set(${OUT_SRCS} ${HIP_SRCS} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
## Macro to conditionally include CUDA sources based on architecture and CUDA
|
||||
## compiler version, optionally generating sources via a Python script.
|
||||
## Usage:
|
||||
## optional_cuda_sources(
|
||||
## NAME <name>
|
||||
## [MIN_VERSION <version>]
|
||||
## ARCHS <arch1;arch2;...>
|
||||
## SRCS <static_source1> [<static_source2> ...]
|
||||
## [FLAGS <flag1> ...]
|
||||
## [VERSION_MSG <line1> [<line2> ...]]
|
||||
## [NO_ARCH_MSG <line1> [<line2> ...]]
|
||||
## [GEN_SCRIPT <path/to/generate_script.py>]
|
||||
## [GEN_GLOB <glob_pattern_for_generated_sources>]
|
||||
## [APPEND_ARCHS <var_to_append_archs>]
|
||||
## This will run GEN_SCRIPT once when version and arch checks pass, globbing
|
||||
## sources matching GEN_GLOB and appending them alongside SRCS.
|
||||
# Macro to conditionally include CUDA sources based on architecture and CUDA
|
||||
# compiler version, optionally generating sources via a Python script.
|
||||
# Usage:
|
||||
# optional_cuda_sources(
|
||||
# NAME <name>
|
||||
# [MIN_VERSION <version>]
|
||||
# ARCHS <arch1;arch2;...>
|
||||
# SRCS <static_source1> [<static_source2> ...]
|
||||
# [FLAGS <flag1> ...]
|
||||
# [VERSION_MSG <line1> [<line2> ...]]
|
||||
# [NO_ARCH_MSG <line1> [<line2> ...]]
|
||||
# [GEN_SCRIPT <path/to/generate_script.py>]
|
||||
# [GEN_GLOB <glob_pattern_for_generated_sources>]
|
||||
# [APPEND_ARCHS <var_to_append_archs>]
|
||||
# This will run GEN_SCRIPT once when version and arch checks pass, globbing
|
||||
# sources matching GEN_GLOB and appending them alongside SRCS.
|
||||
macro(optional_cuda_sources)
|
||||
set(oneValueArgs NAME MIN_VERSION APPEND_ARCHS GEN_SCRIPT GEN_GLOB OUT_SRCS_VAR)
|
||||
set(multiValueArgs ARCHS SRCS FLAGS VERSION_MSG NO_ARCH_MSG)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user