From c312320764193e7d0ffa99d247c61efe5458a635 Mon Sep 17 00:00:00 2001
From: Huamin Li <3ericli@gmail.com>
Date: Fri, 17 Oct 2025 21:11:26 -0700
Subject: [PATCH] =?UTF-8?q?[CI/Build]=20tests(v1):=20feed=20Triton=20atten?=
 =?UTF-8?q?tion=20the=20(num=5Fblocks,=202,=20=E2=80=A6)=20KV=20cache=20la?=
 =?UTF-8?q?yout=20in=20backend-correctness=20tests=20(#26663)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Huamin Li <3ericli@gmail.com>
Co-authored-by: Ye (Charlotte) Qi <yeq@meta.com>
---
 tests/v1/attention/test_attention_backends.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/v1/attention/test_attention_backends.py b/tests/v1/attention/test_attention_backends.py
index 07706d4b956c..174642123d5a 100644
--- a/tests/v1/attention/test_attention_backends.py
+++ b/tests/v1/attention/test_attention_backends.py
@@ -423,13 +423,14 @@ def _test_backend_correctness(
     for backend_name in backend_to_test:
         # FlashAttentionm + FlexAttention:
         #   [2, num_blocks, block_size, num_kv_heads, head_size]
-        # FlashInfer:
+        # FlashInfer + Triton:
         #   [num_blocks, 2, block_size, num_kv_heads, head_size]
         # Select the appropriate KV cache format for each backend
         kv_cache_for_backend = kv_cache
-        if backend_name == _Backend.FLASHINFER:
+        if backend_name in (_Backend.FLASHINFER, _Backend.TRITON_ATTN):
             kv_cache_for_backend = kv_cache.transpose(0, 1)
 
+        if backend_name == _Backend.FLASHINFER:
             # For FlashInfer default to HND layout and
             kv_cache_for_backend = (
                 kv_cache_for_backend.transpose(2, 3).contiguous().transpose(2, 3)