[ROCm][AMD][Bugfix] adding a missing triton autotune config (#4845)

This commit is contained in:
Hongxia Yang 2024-05-16 13:46:52 -04:00 committed by GitHub
parent f09edd8a25
commit b5853f9963
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -239,6 +239,16 @@ def _attn_fwd_inner(
num_stages=1,
num_warps=8,
),
triton.Config(
{
"BLOCK_M": 128,
"BLOCK_N": 64,
"waves_per_eu": 1,
"PRE_LOAD_V": False,
},
num_stages=1,
num_warps=4,
),
triton.Config(
{
"BLOCK_M": 128,