From b639327ad94b3aa16022ebea49f8e525660b736b Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Sat, 12 Jul 2025 15:07:35 +0900 Subject: [PATCH] Revert "Use NVCC --compress-mode to reduce binary size by 30% #20694" (#20853) Signed-off-by: mgoin --- CMakeLists.txt | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 538f9adcb24ed..e59e912a99132 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,16 +171,6 @@ if(NVCC_THREADS AND VLLM_GPU_LANG STREQUAL "CUDA") list(APPEND VLLM_GPU_FLAGS "--threads=${NVCC_THREADS}") endif() -# -# Set nvcc fatbin compression. -# -if(VLLM_GPU_LANG STREQUAL "CUDA") - if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8) - list(APPEND VLLM_GPU_FLAGS "-Xfatbin" "-compress-all" "-compress-mode=size") - endif() -endif() - - # # Use FetchContent for C++ dependencies that are compiled as part of vLLM's build process. # setup.py will override FETCHCONTENT_BASE_DIR to play nicely with sccache.