[Hardware][AMD] Enable FlexAttention backend on ROCm (#26439)

Signed-off-by: Matthew Wong <Matthew.Wong2@amd.com>
This commit is contained in:
Matt 2025-10-09 01:20:18 -05:00 committed by GitHub
parent 8bd696fa53
commit de253d63b7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -276,6 +276,9 @@ class RocmPlatform(Platform):
)
if envs.VLLM_USE_V1:
if selected_backend == _Backend.FLEX_ATTENTION:
logger.info("Using FlexAttention backend on V1 engine.")
return "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend"
if (
envs.VLLM_ROCM_USE_AITER and envs.VLLM_ROCM_USE_AITER_MHA and on_gfx9()
) or selected_backend == _Backend.ROCM_AITER_FA: