Merge 4ab91bcc015efb6ad9edf8d80133d11208b3dddf into 254f6b986720c92ddf97fbb1a6a6465da8e87e29

2026-07-07 16:57:19 +08:00 · 2025-12-25 08:06:45 +08:00 · 2025-12-25 08:06:45 +08:00 · e6bca90aac
commit e6bca90aac
parent 254f6b9867 4ab91bcc01
1 changed files with 7 additions and 2 deletions
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@ -294,8 +294,13 @@ class RocmPlatform(Platform):
                attn_selector_config.attn_type is not None
                and attn_selector_config.attn_type == AttentionType.ENCODER_ONLY
            ):
-                logger.info("Using FlexAttention backend.")
-                return AttentionBackendEnum.FLEX_ATTENTION.get_path()
+                # Use generic FlashAttention for encoder-only models
+                # ROCM_AITER_FA doesn't support encoder-only (causal-only limitation)
+                # Generic FLASH_ATTN supports all attention types including ENCODER_ONLY
+                logger.info(
+                    "Using FlashAttention backend for encoder-only model on ROCm."
+                )
+                return AttentionBackendEnum.FLASH_ATTN.get_path()

            # Default: Triton Unified Attention
            logger.info("Using Triton Attention backend.")