fix(rocm): add early return in get_flash_attn_version for ROCm

Prevents spurious "libcudart.so.12 not found" errors by skipping the CUDA-specific vllm_flash_attn import on ROCm platform. Signed-off-by: rabi <ramishra@redhat.com>
2026-07-04 04:57:09 +08:00 · 2025-12-24 19:23:14 +05:30 · 2025-12-24 19:23:14 +05:30 · 3b1a3cae0e
commit 3b1a3cae0e
parent 7adeb4bfa8
1 changed files with 1 additions and 1 deletions
--- a/vllm/attention/utils/fa_utils.py
+++ b/vllm/attention/utils/fa_utils.py
@ -31,7 +31,7 @@ def get_flash_attn_version(requires_alibi: bool = False) -> int | None:
    # import here to avoid circular dependencies
    from vllm.platforms import current_platform

-    if current_platform.is_xpu():
+    if current_platform.is_xpu() or current_platform.is_rocm():
        return 2
    try:
        from vllm.vllm_flash_attn.flash_attn_interface import (