From dec197e3e5d14e1d4fbad61b565e151f52976c0f Mon Sep 17 00:00:00 2001 From: "Chendi.Xue" Date: Fri, 27 Jun 2025 00:48:13 -0500 Subject: [PATCH] Quick Fix by adding conditional import for flash_attn_varlen_func in flash_attn (#20143) Signed-off-by: Chendi.Xue --- vllm/attention/utils/fa_utils.py | 4 ++++ vllm/v1/attention/backends/flash_attn.py | 10 +++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/vllm/attention/utils/fa_utils.py b/vllm/attention/utils/fa_utils.py index 36fd2d231bc5..f8b00565f051 100644 --- a/vllm/attention/utils/fa_utils.py +++ b/vllm/attention/utils/fa_utils.py @@ -66,3 +66,7 @@ def get_flash_attn_version(requires_alibi: bool = False) -> Optional[int]: def flash_attn_supports_fp8() -> bool: return get_flash_attn_version() == 3 and \ current_platform.get_device_capability().major == 9 + + +def is_flash_attn_varlen_func_available() -> bool: + return current_platform.is_cuda() or current_platform.is_xpu() diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py index 42b5997f085b..527b31153410 100755 --- a/vllm/v1/attention/backends/flash_attn.py +++ b/vllm/v1/attention/backends/flash_attn.py @@ -14,10 +14,14 @@ from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, from vllm.attention.layer import Attention from vllm.attention.ops.merge_attn_states import merge_attn_states from vllm.attention.utils.fa_utils import (flash_attn_supports_fp8, - flash_attn_varlen_func, get_flash_attn_version, - get_scheduler_metadata, - reshape_and_cache_flash) + is_flash_attn_varlen_func_available) + +if is_flash_attn_varlen_func_available(): + from vllm.attention.utils.fa_utils import (flash_attn_varlen_func, + get_scheduler_metadata, + reshape_and_cache_flash) + from vllm.config import VllmConfig, get_layers_from_vllm_config from vllm.logger import init_logger from vllm.utils import cdiv