mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-09 05:15:39 +08:00
Update note comment for flashinfer attention warmup (#30711)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
009a773828
commit
d4d2751732
@ -49,13 +49,12 @@ def kernel_warmup(worker: "Worker"):
|
|||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# NOTE: we add check for empty attn_groups to avoid errors when
|
|
||||||
# deploying models such as E instances and encoder-only models.
|
|
||||||
# As for those models, worker.model_runner.attn_groups is empty.
|
|
||||||
# This change is made during EPD feature development.
|
|
||||||
if (
|
if (
|
||||||
not worker.model_runner.is_pooling_model
|
not worker.model_runner.is_pooling_model
|
||||||
and worker.model_runner.attn_groups
|
and worker.model_runner.attn_groups
|
||||||
|
# NOTE: This should be `any` instead of `all` but other hybrid attention
|
||||||
|
# backends don't support this dummy run. Once we remove
|
||||||
|
# `build_for_cudagraph_capture`, we can change it to `any`.
|
||||||
and all(
|
and all(
|
||||||
_is_flashinfer_backend(group.backend)
|
_is_flashinfer_backend(group.backend)
|
||||||
for groups in worker.model_runner.attn_groups
|
for groups in worker.model_runner.attn_groups
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user