From d4d2751732c3ccae162a5a0160c7d4fe05d2779a Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Wed, 17 Dec 2025 00:29:03 -0500 Subject: [PATCH] Update note comment for flashinfer attention warmup (#30711) Signed-off-by: mgoin --- vllm/model_executor/warmup/kernel_warmup.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/warmup/kernel_warmup.py b/vllm/model_executor/warmup/kernel_warmup.py index 95f5982bc8c7b..98b28d3e5292f 100644 --- a/vllm/model_executor/warmup/kernel_warmup.py +++ b/vllm/model_executor/warmup/kernel_warmup.py @@ -49,13 +49,12 @@ def kernel_warmup(worker: "Worker"): except NotImplementedError: return False - # NOTE: we add check for empty attn_groups to avoid errors when - # deploying models such as E instances and encoder-only models. - # As for those models, worker.model_runner.attn_groups is empty. - # This change is made during EPD feature development. if ( not worker.model_runner.is_pooling_model and worker.model_runner.attn_groups + # NOTE: This should be `any` instead of `all` but other hybrid attention + # backends don't support this dummy run. Once we remove + # `build_for_cudagraph_capture`, we can change it to `any`. and all( _is_flashinfer_backend(group.backend) for groups in worker.model_runner.attn_groups