From c312320764193e7d0ffa99d247c61efe5458a635 Mon Sep 17 00:00:00 2001 From: Huamin Li <3ericli@gmail.com> Date: Fri, 17 Oct 2025 21:11:26 -0700 Subject: [PATCH] =?UTF-8?q?[CI/Build]=20tests(v1):=20feed=20Triton=20atten?= =?UTF-8?q?tion=20the=20(num=5Fblocks,=202,=20=E2=80=A6)=20KV=20cache=20la?= =?UTF-8?q?yout=20in=20backend-correctness=20tests=20(#26663)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Huamin Li <3ericli@gmail.com> Co-authored-by: Ye (Charlotte) Qi --- tests/v1/attention/test_attention_backends.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/v1/attention/test_attention_backends.py b/tests/v1/attention/test_attention_backends.py index 07706d4b956c..174642123d5a 100644 --- a/tests/v1/attention/test_attention_backends.py +++ b/tests/v1/attention/test_attention_backends.py @@ -423,13 +423,14 @@ def _test_backend_correctness( for backend_name in backend_to_test: # FlashAttentionm + FlexAttention: # [2, num_blocks, block_size, num_kv_heads, head_size] - # FlashInfer: + # FlashInfer + Triton: # [num_blocks, 2, block_size, num_kv_heads, head_size] # Select the appropriate KV cache format for each backend kv_cache_for_backend = kv_cache - if backend_name == _Backend.FLASHINFER: + if backend_name in (_Backend.FLASHINFER, _Backend.TRITON_ATTN): kv_cache_for_backend = kv_cache.transpose(0, 1) + if backend_name == _Backend.FLASHINFER: # For FlashInfer default to HND layout and kv_cache_for_backend = ( kv_cache_for_backend.transpose(2, 3).contiguous().transpose(2, 3)