From cc5befbced220b3bbb1a041a2d6aadd98472278d Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Mon, 28 Apr 2025 16:55:50 -0400 Subject: [PATCH] [BugFix] Fix cascade attention - RuntimeError: scheduler_metadata must have shape (metadata_size) (#17283) Signed-off-by: Lucas Wilkinson --- vllm/v1/attention/backends/flash_attn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py index 0c86ad8a828a6..41bb9aba29953 100755 --- a/vllm/v1/attention/backends/flash_attn.py +++ b/vllm/v1/attention/backends/flash_attn.py @@ -372,7 +372,7 @@ class FlashAttentionMetadataBuilder: suffix_kv_lens = torch.from_numpy(suffix_kv_lens).to( self.runner.device) prefix_scheduler_metadata = schedule( - batch_size=num_reqs, + batch_size=1, cu_query_lens=cu_prefix_query_lens, max_query_len=num_actual_tokens, seqlens=prefix_kv_lens,