From 3f1b03739ae1422361446d3d23bed970bd549ebc Mon Sep 17 00:00:00 2001 From: TJian Date: Thu, 4 Dec 2025 16:20:24 +0800 Subject: [PATCH] [ROCm] [Bugfix] `compute_attn_mask_seqlen` for qwen3 omni (#29974) Signed-off-by: tjtanaa --- vllm/model_executor/models/qwen3_omni_moe_thinker.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/qwen3_omni_moe_thinker.py b/vllm/model_executor/models/qwen3_omni_moe_thinker.py index fe825198dcaa4..e6979211b707f 100755 --- a/vllm/model_executor/models/qwen3_omni_moe_thinker.py +++ b/vllm/model_executor/models/qwen3_omni_moe_thinker.py @@ -494,7 +494,10 @@ class Qwen3Omni_VisionTransformer(nn.Module): cu_seqlens: torch.Tensor, ) -> torch.Tensor: max_seqlen = torch.zeros([], device=cu_seqlens.device) - if self.attn_backend == AttentionBackendEnum.FLASH_ATTN: + if self.attn_backend in { + AttentionBackendEnum.FLASH_ATTN, + AttentionBackendEnum.ROCM_AITER_FA, + }: max_seqlen = (cu_seqlens[1:] - cu_seqlens[:-1]).max() return max_seqlen