Update note comment for flashinfer attention warmup (#30711)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin 2025-12-17 00:29:03 -05:00 committed by GitHub
parent 009a773828
commit d4d2751732
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -49,13 +49,12 @@ def kernel_warmup(worker: "Worker"):
except NotImplementedError:
return False
# NOTE: we add check for empty attn_groups to avoid errors when
# deploying models such as E instances and encoder-only models.
# As for those models, worker.model_runner.attn_groups is empty.
# This change is made during EPD feature development.
if (
not worker.model_runner.is_pooling_model
and worker.model_runner.attn_groups
# NOTE: This should be `any` instead of `all` but other hybrid attention
# backends don't support this dummy run. Once we remove
# `build_for_cudagraph_capture`, we can change it to `any`.
and all(
_is_flashinfer_backend(group.backend)
for groups in worker.model_runner.attn_groups