[Misc] Set default backend to SDPA for get_vit_attn_backend (#12235)

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-12-21 15:05:46 +08:00 · 2025-01-22 03:52:11 +08:00 · 2025-01-22 03:52:11 +08:00 · fa9ee08121
commit fa9ee08121
parent 347eeebe3b
1 changed files with 16 additions and 14 deletions
--- a/vllm/model_executor/models/vision.py
+++ b/vllm/model_executor/models/vision.py
@ -82,7 +82,7 @@ def get_vit_attn_backend(support_fa: bool = False) -> _Backend:
        if backend_by_env_var is not None:
            selected_backend = backend_name_to_enum(backend_by_env_var)
    if selected_backend is None:
-        # For Volta and Turing GPUs, use xformers instead.
+        if current_platform.is_cuda():
            device_available = current_platform.has_device_capability(80)
            if device_available and support_fa:
                from transformers.utils import is_flash_attn_2_available
@ -90,15 +90,17 @@ def get_vit_attn_backend(support_fa: bool = False) -> _Backend:
                    selected_backend = _Backend.FLASH_ATTN
                else:
                    logger.warning_once(
-                    "Current `vllm-flash-attn` has a bug inside vision module, "
+                        "Current `vllm-flash-attn` has a bug inside vision "
-                    "so we use xformers backend instead. You can run "
+                        "module, so we use xformers backend instead. You can "
-                    "`pip install flash-attn` to use flash-attention backend.")
+                        "run `pip install flash-attn` to use flash-attention "
                        "backend.")
                    selected_backend = _Backend.XFORMERS
        elif current_platform.is_cpu() or current_platform.is_rocm():
            # ROCM doesn't support xformers
            selected_backend = _Backend.TORCH_SDPA
            else:
                # For Volta and Turing GPUs, use xformers instead.
                selected_backend = _Backend.XFORMERS
        else:
            # Default to torch SDPA for other non-GPU platforms.
            selected_backend = _Backend.TORCH_SDPA
    return selected_backend