From 4c33d6732148fdaeb9780fa86fca1f87f2a93c19 Mon Sep 17 00:00:00 2001 From: Hui Liu <96135754+hliuca@users.noreply.github.com> Date: Fri, 2 May 2025 09:44:07 -0700 Subject: [PATCH] [Bugfix] fix tmp_out and exp_sums dimensions (#17438) Signed-off-by: Hui Liu <96135754+hliuca@users.noreply.github.com> --- vllm/attention/ops/chunked_prefill_paged_decode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/ops/chunked_prefill_paged_decode.py b/vllm/attention/ops/chunked_prefill_paged_decode.py index 1b47581641b0..759b3d8536dd 100644 --- a/vllm/attention/ops/chunked_prefill_paged_decode.py +++ b/vllm/attention/ops/chunked_prefill_paged_decode.py @@ -289,7 +289,7 @@ def chunked_prefill_paged_decode( max_num_partitions = ((max_seq_len + _PARTITION_SIZE_ROCM - 1) // _PARTITION_SIZE_ROCM) assert _PARTITION_SIZE_ROCM % block_size == 0 - total_num_seq = query.shape[0] + total_num_seq = block_table.shape[0] tmp_output = torch.empty( size=(total_num_seq, num_query_heads, max_num_partitions, head_size),