From d4d2751732c3ccae162a5a0160c7d4fe05d2779a Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Wed, 17 Dec 2025 00:29:03 -0500
Subject: [PATCH] Update note comment for flashinfer attention warmup (#30711)

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 vllm/model_executor/warmup/kernel_warmup.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/vllm/model_executor/warmup/kernel_warmup.py b/vllm/model_executor/warmup/kernel_warmup.py
index 95f5982bc8c7b..98b28d3e5292f 100644
--- a/vllm/model_executor/warmup/kernel_warmup.py
+++ b/vllm/model_executor/warmup/kernel_warmup.py
@@ -49,13 +49,12 @@ def kernel_warmup(worker: "Worker"):
         except NotImplementedError:
             return False
 
-    # NOTE: we add check for empty attn_groups to avoid errors when
-    # deploying models such as E instances and encoder-only models.
-    # As for those models, worker.model_runner.attn_groups is empty.
-    # This change is made during EPD feature development.
     if (
         not worker.model_runner.is_pooling_model
         and worker.model_runner.attn_groups
+        # NOTE: This should be `any` instead of `all` but other hybrid attention
+        # backends don't support this dummy run. Once we remove
+        # `build_for_cudagraph_capture`, we can change it to `any`.
         and all(
             _is_flashinfer_backend(group.backend)
             for groups in worker.model_runner.attn_groups