From 5c7963249daf0b57e803605079e8869e8b071247 Mon Sep 17 00:00:00 2001
From: Mengqing Cao <cmq0113@163.com>
Date: Tue, 24 Dec 2024 20:39:36 +0800
Subject: [PATCH] [attn][tiny fix] fix attn backend in MultiHeadAttention
 (#11463)

Signed-off-by: Mengqing Cao <cmq0113@163.com>
---
 vllm/attention/layer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py
index 05d997279893b..69b6d1e4648df 100644
--- a/vllm/attention/layer.py
+++ b/vllm/attention/layer.py
@@ -191,6 +191,7 @@ class MultiHeadAttention(nn.Module):
                                         kv_cache_dtype=None,
                                         block_size=16,
                                         is_attention_free=False)
+        attn_backend = backend_name_to_enum(attn_backend.get_name())
         if attn_backend in {_Backend.FLASH_ATTN, _Backend.FLASH_ATTN_VLLM_V1}:
             attn_backend = _Backend.XFORMERS