diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 37f9a4b383ce9..a8e796a1eab63 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -310,7 +310,8 @@ class Attention(nn.Module, AttentionLayerBase): kv_sharing_target_layer_name, **extra_impl_args, ) - self.backend = AttentionBackendEnum[self.attn_backend.get_name()] + backend_name = self.attn_backend.get_name() + self.backend = AttentionBackendEnum.__members__.get(backend_name) self.dtype = dtype # For cuda-alike (CUDA and ROCM) and cpu platforms, we control how