From 3b1a3cae0eae533d854362758326e3b399ad9893 Mon Sep 17 00:00:00 2001 From: rabi Date: Wed, 24 Dec 2025 19:23:14 +0530 Subject: [PATCH] fix(rocm): add early return in get_flash_attn_version for ROCm Prevents spurious "libcudart.so.12 not found" errors by skipping the CUDA-specific vllm_flash_attn import on ROCm platform. Signed-off-by: rabi --- vllm/attention/utils/fa_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/utils/fa_utils.py b/vllm/attention/utils/fa_utils.py index e38c88f4838d1..1daa79762471f 100644 --- a/vllm/attention/utils/fa_utils.py +++ b/vllm/attention/utils/fa_utils.py @@ -31,7 +31,7 @@ def get_flash_attn_version(requires_alibi: bool = False) -> int | None: # import here to avoid circular dependencies from vllm.platforms import current_platform - if current_platform.is_xpu(): + if current_platform.is_xpu() or current_platform.is_rocm(): return 2 try: from vllm.vllm_flash_attn.flash_attn_interface import (