From 60e089f0b90b1fe9b65224b069c953927d1f3b44 Mon Sep 17 00:00:00 2001 From: Xiake Sun Date: Mon, 17 Nov 2025 12:52:11 +0800 Subject: [PATCH] [ROCm][Qwen3-32B] Fix AITER MHA accuracy issue cause by #25763 (#28670) Signed-off-by: Xiake Sun --- vllm/v1/attention/backends/rocm_aiter_fa.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py index ad454daa582eb..ea611848b0e81 100644 --- a/vllm/v1/attention/backends/rocm_aiter_fa.py +++ b/vllm/v1/attention/backends/rocm_aiter_fa.py @@ -729,7 +729,7 @@ class AiterFlashAttentionImpl(AttentionImpl): cu_seqlens_k=attn_metadata.prefill_metadata.query_start_loc, max_seqlen_q=attn_metadata.prefill_metadata.max_query_len, max_seqlen_k=attn_metadata.prefill_metadata.max_seq_len, - min_seqlen_q=attn_metadata.prefill_metadata.min_query_len, + min_seqlen_q=1, dropout_p=0.0, softmax_scale=self.scale, causal=True, @@ -759,7 +759,7 @@ class AiterFlashAttentionImpl(AttentionImpl): cu_seqlens_q=attn_metadata.extend_metadata.query_start_loc, max_seqlen_q=attn_metadata.extend_metadata.max_query_len, max_seqlen_k=attn_metadata.extend_metadata.max_seq_len, - min_seqlen_q=attn_metadata.extend_metadata.min_query_len, + min_seqlen_q=1, block_table=attn_metadata.block_table[ num_decodes : num_decodes + num_extends ],