From faae7a7eabcb50fd35d325f0c27d94f6232ec05c Mon Sep 17 00:00:00 2001 From: Alexander Matveev <59768536+alexm-redhat@users.noreply.github.com> Date: Tue, 23 Sep 2025 19:57:55 -0400 Subject: [PATCH] [Bugfix] [B200] cutlass_mla - ensure kv_split == 1 for batch size > 1 (#25509) Signed-off-by: Alexander Matveev Signed-off-by: yewentao256 --- csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp b/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp index fbbc2e588c326..297d94dcc0631 100644 --- a/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp +++ b/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp @@ -135,10 +135,10 @@ public: max_splits = min(16, max_splits); // TODO: This avoids a hang when the batch size larger than 1 and - // there is more than 4 kv_splits. + // there is more than 1 kv_splits. // Discuss with NVIDIA how this can be fixed. if (B > 1) { - max_splits = min(2, max_splits); + max_splits = min(1, max_splits); } // printf(" max_splits = %d\n", max_splits);