From 5c7963249daf0b57e803605079e8869e8b071247 Mon Sep 17 00:00:00 2001 From: Mengqing Cao Date: Tue, 24 Dec 2024 20:39:36 +0800 Subject: [PATCH] [attn][tiny fix] fix attn backend in MultiHeadAttention (#11463) Signed-off-by: Mengqing Cao --- vllm/attention/layer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 05d997279893b..69b6d1e4648df 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -191,6 +191,7 @@ class MultiHeadAttention(nn.Module): kv_cache_dtype=None, block_size=16, is_attention_free=False) + attn_backend = backend_name_to_enum(attn_backend.get_name()) if attn_backend in {_Backend.FLASH_ATTN, _Backend.FLASH_ATTN_VLLM_V1}: attn_backend = _Backend.XFORMERS