Fix FA2 fallback for Blackwell V1 (#19781)

Signed-off-by: mgoin <mgoin64@gmail.com>
2025-12-15 09:15:55 +08:00 · 2025-06-19 10:53:55 +09:00 · 2025-06-19 10:53:55 +09:00 · 36239f79dd
commit 36239f79dd
parent dfada85eee
1 changed files with 1 additions and 1 deletions
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@ -255,7 +255,7 @@ class CudaPlatformBase(Platform):
                        "install FlashInfer for better performance.")
                    pass
            # FlashAttention is the default for SM 8.0+ GPUs
-            elif cls.has_device_capability(80):
+            if cls.has_device_capability(80):
                logger.info_once("Using Flash Attention backend on V1 engine.")
                return ("vllm.v1.attention.backends."
                        "flash_attn.FlashAttentionBackend")