From 980a172474fa0f32433dda87ae1fa4aadba24c51 Mon Sep 17 00:00:00 2001
From: Percy <xhc_1007@163.com>
Date: Tue, 20 May 2025 13:19:34 -0500
Subject: [PATCH] [Kernel] update comment for KV shape in unified triton attn
 (#18099)

Signed-off-by: haochengxia <xhc_1007@163.com>
---
 vllm/attention/ops/triton_unified_attention.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/attention/ops/triton_unified_attention.py b/vllm/attention/ops/triton_unified_attention.py
index 241e84ca669d..4bced779785a 100644
--- a/vllm/attention/ops/triton_unified_attention.py
+++ b/vllm/attention/ops/triton_unified_attention.py
@@ -31,8 +31,8 @@ def apply_softcap(S, x):
 def kernel_unified_attention_2d(
     output_ptr,  # [num_tokens, num_query_heads, head_size]
     query_ptr,  # [num_tokens, num_query_heads, head_size]
-    key_cache_ptr,  # [num_blks, num_kv_heads, head_size // x, blk_size, x]
-    value_cache_ptr,  # [num_blks, num_kv_heads, head_size, blk_size]
+    key_cache_ptr,  # [num_blks, blk_size, num_kv_heads, head_size]
+    value_cache_ptr,  # [num_blks, blk_size, num_kv_heads, head_size]
     block_tables_ptr,  # [num_seqs, max_num_blocks_per_seq]
     seq_lens_ptr,  # [num_seqs]
     alibi_slopes_ptr,  # [num_query_heads]