From 2e1b8bc2b6d1796d65cea6ed514759d12a72cf17 Mon Sep 17 00:00:00 2001 From: Zhou Jiahao Date: Tue, 30 Sep 2025 16:15:23 +0800 Subject: [PATCH] [Model][Bugfix] Fix MiDashengLM audio encoder mask by removing incorrect `logical_not` (#25925) Signed-off-by: zhoukz --- vllm/model_executor/models/midashenglm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm/model_executor/models/midashenglm.py b/vllm/model_executor/models/midashenglm.py index 0b62fbd40b07..33bd64df5b53 100644 --- a/vllm/model_executor/models/midashenglm.py +++ b/vllm/model_executor/models/midashenglm.py @@ -426,8 +426,7 @@ class DashengAudioTransformer(nn.Module): assert x_length.ndim == 1, "Lengths are of size (B,)" scaled_lengths = (x_length / (self.hop_length * 4)).long() mask = self._to_mask(max_length=t, lengths=scaled_lengths) - split_masks = mask.logical_not().split(target_length_in_patches, - dim=-1) + split_masks = mask.split(target_length_in_patches, dim=-1) else: mask = None split_masks = [None] * len(input_splits)