Update note comment for flashinfer attention warmup (#30711)

Signed-off-by: mgoin <mgoin64@gmail.com>
2026-03-16 14:07:13 +08:00 · 2025-12-17 00:29:03 -05:00 · 2025-12-17 00:29:03 -05:00 · d4d2751732
commit d4d2751732
parent 009a773828
1 changed files with 3 additions and 4 deletions
--- a/vllm/model_executor/warmup/kernel_warmup.py
+++ b/vllm/model_executor/warmup/kernel_warmup.py
@ -49,13 +49,12 @@ def kernel_warmup(worker: "Worker"):
        except NotImplementedError:
            return False

-    # NOTE: we add check for empty attn_groups to avoid errors when
-    # deploying models such as E instances and encoder-only models.
-    # As for those models, worker.model_runner.attn_groups is empty.
-    # This change is made during EPD feature development.
    if (
        not worker.model_runner.is_pooling_model
        and worker.model_runner.attn_groups
+        # NOTE: This should be `any` instead of `all` but other hybrid attention
+        # backends don't support this dummy run. Once we remove
+        # `build_for_cudagraph_capture`, we can change it to `any`.
        and all(
            _is_flashinfer_backend(group.backend)
            for groups in worker.model_runner.attn_groups