diff --git a/vllm/model_executor/models/dots_ocr.py b/vllm/model_executor/models/dots_ocr.py index 1bc50f27269e..d1a9f4cb3b2e 100644 --- a/vllm/model_executor/models/dots_ocr.py +++ b/vllm/model_executor/models/dots_ocr.py @@ -680,7 +680,7 @@ class DotsVisionTransformer(nn.Module): dim=0, dtype=grid_thw.dtype if torch.jit.is_tracing() else torch.int32, ) - cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0) + cu_seqlens = torch.cat([cu_seqlens.new_zeros(1), cu_seqlens]) max_seqlen, seqlens = self.compute_attn_mask_seqlen(cu_seqlens) for blk in self.blocks: diff --git a/vllm/model_executor/models/ernie45_vl.py b/vllm/model_executor/models/ernie45_vl.py index 493260cf73ef..2579a0ebf53e 100644 --- a/vllm/model_executor/models/ernie45_vl.py +++ b/vllm/model_executor/models/ernie45_vl.py @@ -574,11 +574,12 @@ class Ernie4_5_VisionTransformer(nn.Module): grid_thw[:, 1] * grid_thw[:, 2], grid_thw[:, 0] ).cumsum(dim=0, dtype=torch.int32) + zeros = cu_seqlens.new_zeros(1) if num_pad > 0: - cu_seqlens = F.pad(cu_seqlens, (1, 1), value=0) + cu_seqlens = torch.cat([zeros, cu_seqlens, zeros]) cu_seqlens[-1] = cu_seqlens[-2] + num_pad else: - cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0) + cu_seqlens = torch.cat([zeros, cu_seqlens]) # add batch size if hidden_states.ndim == 2: diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index 12cb979f0900..f7ba06d97f01 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -539,7 +539,7 @@ class Qwen3_VisionTransformer(nn.Module): dim=0, dtype=grid_thw_tensor.dtype if torch.jit.is_tracing() else torch.int32, ) - cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0) + cu_seqlens = torch.cat([cu_seqlens.new_zeros(1), cu_seqlens]) hidden_states = hidden_states.unsqueeze(1) rotary_pos_emb = rotary_pos_emb.to(hidden_states.device) diff --git a/vllm/model_executor/models/siglip2navit.py b/vllm/model_executor/models/siglip2navit.py index 7cd133d9da1d..81f7e9887ace 100644 --- a/vllm/model_executor/models/siglip2navit.py +++ b/vllm/model_executor/models/siglip2navit.py @@ -592,7 +592,7 @@ class Siglip2Encoder(nn.Module): # for more information dtype=grid_thws.dtype if torch.jit.is_tracing() else torch.int32, ) - cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0) + cu_seqlens = torch.cat([cu_seqlens.new_zeros(1), cu_seqlens]) reverse_indices = torch.argsort(window_index)