diff --git a/CMakeLists.txt b/CMakeLists.txt index 0129f85123fb9..c13961b55dc54 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,6 +171,13 @@ if(NVCC_THREADS AND VLLM_GPU_LANG STREQUAL "CUDA") list(APPEND VLLM_GPU_FLAGS "--threads=${NVCC_THREADS}") endif() +# +# Set nvcc fatbin compression. +# +if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.8 AND VLLM_GPU_LANG STREQUAL "CUDA") + list(APPEND VLLM_GPU_FLAGS "-Xfatbin -compress-all -compress-mode=size") +endif() + # # Use FetchContent for C++ dependencies that are compiled as part of vLLM's build process.