[Neuron] trim attention kernel tests to fit trn1.2x instance (#14988)

Signed-off-by: Liangfu Chen <liangfc@amazon.com>
This commit is contained in:
Liangfu Chen 2025-03-18 00:05:52 -07:00 committed by GitHub
parent 5eeabc2a44
commit 53a0cf8b95
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -314,7 +314,7 @@ def get_active_block_tables(block_tables, query_lens, seq_lens, block_size,
# Test edge cases
(1, 128, 16, 1024, 4, 2, 16, False), # large decode batch
(16, 4, 8, 8192, 48, 1, 128, True), # large prefill batch
(16, 4, 8, 1024, 4, 2, 128, True), # large prefill batch
(4, 12, 32, 2048, 16, 1, 32, True), # multi-head attention (MHA)
(4, 12, 32, 2048, 16, 16, 32, True), # multi-query attention (MQA)
])