mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-24 18:05:01 +08:00
[Neuron] trim attention kernel tests to fit trn1.2x instance (#14988)
Signed-off-by: Liangfu Chen <liangfc@amazon.com>
This commit is contained in:
parent
5eeabc2a44
commit
53a0cf8b95
@ -314,7 +314,7 @@ def get_active_block_tables(block_tables, query_lens, seq_lens, block_size,
|
||||
|
||||
# Test edge cases
|
||||
(1, 128, 16, 1024, 4, 2, 16, False), # large decode batch
|
||||
(16, 4, 8, 8192, 48, 1, 128, True), # large prefill batch
|
||||
(16, 4, 8, 1024, 4, 2, 128, True), # large prefill batch
|
||||
(4, 12, 32, 2048, 16, 1, 32, True), # multi-head attention (MHA)
|
||||
(4, 12, 32, 2048, 16, 16, 32, True), # multi-query attention (MQA)
|
||||
])
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user