mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-24 16:25:02 +08:00
Update note comment for flashinfer attention warmup (#30711)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
009a773828
commit
d4d2751732
@ -49,13 +49,12 @@ def kernel_warmup(worker: "Worker"):
|
||||
except NotImplementedError:
|
||||
return False
|
||||
|
||||
# NOTE: we add check for empty attn_groups to avoid errors when
|
||||
# deploying models such as E instances and encoder-only models.
|
||||
# As for those models, worker.model_runner.attn_groups is empty.
|
||||
# This change is made during EPD feature development.
|
||||
if (
|
||||
not worker.model_runner.is_pooling_model
|
||||
and worker.model_runner.attn_groups
|
||||
# NOTE: This should be `any` instead of `all` but other hybrid attention
|
||||
# backends don't support this dummy run. Once we remove
|
||||
# `build_for_cudagraph_capture`, we can change it to `any`.
|
||||
and all(
|
||||
_is_flashinfer_backend(group.backend)
|
||||
for groups in worker.model_runner.attn_groups
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user