From 53a0cf8b95beb0eaf1b374146b0f33e216fe82d0 Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Tue, 18 Mar 2025 00:05:52 -0700 Subject: [PATCH] [Neuron] trim attention kernel tests to fit trn1.2x instance (#14988) Signed-off-by: Liangfu Chen --- tests/neuron/1_core/test_prefix_prefill.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/neuron/1_core/test_prefix_prefill.py b/tests/neuron/1_core/test_prefix_prefill.py index 37d6679f8d55b..5a811f6defe6c 100644 --- a/tests/neuron/1_core/test_prefix_prefill.py +++ b/tests/neuron/1_core/test_prefix_prefill.py @@ -314,7 +314,7 @@ def get_active_block_tables(block_tables, query_lens, seq_lens, block_size, # Test edge cases (1, 128, 16, 1024, 4, 2, 16, False), # large decode batch - (16, 4, 8, 8192, 48, 1, 128, True), # large prefill batch + (16, 4, 8, 1024, 4, 2, 128, True), # large prefill batch (4, 12, 32, 2048, 16, 1, 32, True), # multi-head attention (MHA) (4, 12, 32, 2048, 16, 16, 32, True), # multi-query attention (MQA) ])