From f48c6791b7bfc2579ad575d33ed83912f0bfb011 Mon Sep 17 00:00:00 2001 From: Zhuohan Li Date: Fri, 8 Mar 2024 17:16:14 -0800 Subject: [PATCH] [FIX] Fix prefix test error on main (#3286) --- vllm/model_executor/layers/attention/backends/flash_attn.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vllm/model_executor/layers/attention/backends/flash_attn.py b/vllm/model_executor/layers/attention/backends/flash_attn.py index 4abe195f274a7..58ccd461b993e 100644 --- a/vllm/model_executor/layers/attention/backends/flash_attn.py +++ b/vllm/model_executor/layers/attention/backends/flash_attn.py @@ -103,8 +103,6 @@ class FlashAttentionBackend: key_cache, value_cache, input_metadata, - self.num_heads, - self.num_kv_heads, self.alibi_slopes, ) else: