diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0129f85123fb9..c13961b55dc54 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -171,6 +171,13 @@ if(NVCC_THREADS AND VLLM_GPU_LANG STREQUAL "CUDA")
   list(APPEND VLLM_GPU_FLAGS "--threads=${NVCC_THREADS}")
 endif()
 
+#
+# Set nvcc fatbin compression.
+#
+if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.8 AND VLLM_GPU_LANG STREQUAL "CUDA")
+  list(APPEND VLLM_GPU_FLAGS "-Xfatbin -compress-all -compress-mode=size")
+endif()
+
 
 #
 # Use FetchContent for C++ dependencies that are compiled as part of vLLM's build process.