From 69c9a01538d1e203be7a0291d1d19b2144e78bb0 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk@thinkingmachines.ai>
Date: Thu, 16 Oct 2025 16:49:29 +0000
Subject: [PATCH] disable flashinfer warmup

---
 vllm/model_executor/warmup/kernel_warmup.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/model_executor/warmup/kernel_warmup.py b/vllm/model_executor/warmup/kernel_warmup.py
index 28792338f036f..09972e6b11660 100644
--- a/vllm/model_executor/warmup/kernel_warmup.py
+++ b/vllm/model_executor/warmup/kernel_warmup.py
@@ -36,9 +36,9 @@ def kernel_warmup(worker: "Worker"):
         max_tokens = worker.scheduler_config.max_num_batched_tokens
         deep_gemm_warmup(model, max_tokens)
 
-    # FlashInfer autotune for Hopper (SM 9.0) and Blackwell (SM 10.0) GPUs
-    if has_flashinfer() and current_platform.has_device_capability(90):
-        flashinfer_autotune(worker.model_runner)
+    # # FlashInfer autotune for Hopper (SM 9.0) and Blackwell (SM 10.0) GPUs
+    # if has_flashinfer() and current_platform.has_device_capability(90):
+    #     flashinfer_autotune(worker.model_runner)
 
     # FlashInfer attention warmup
     # Only warmup if the model has FlashInfer attention groups