Merge 4ab91bcc015efb6ad9edf8d80133d11208b3dddf into 254f6b986720c92ddf97fbb1a6a6465da8e87e29

This commit is contained in:
Steve Westerhouse 2025-12-25 08:06:45 +08:00 committed by GitHub
commit e6bca90aac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -294,8 +294,13 @@ class RocmPlatform(Platform):
attn_selector_config.attn_type is not None
and attn_selector_config.attn_type == AttentionType.ENCODER_ONLY
):
logger.info("Using FlexAttention backend.")
return AttentionBackendEnum.FLEX_ATTENTION.get_path()
# Use generic FlashAttention for encoder-only models
# ROCM_AITER_FA doesn't support encoder-only (causal-only limitation)
# Generic FLASH_ATTN supports all attention types including ENCODER_ONLY
logger.info(
"Using FlashAttention backend for encoder-only model on ROCm."
)
return AttentionBackendEnum.FLASH_ATTN.get_path()
# Default: Triton Unified Attention
logger.info("Using Triton Attention backend.")