diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index ab9bfe4d0f191..11d6a0c8dc192 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -720,7 +720,7 @@ class Qwen2VisionTransformer(nn.Module): rotary_pos_emb = self.rot_pos_emb(grid_thw) # compute cu_seqlens - grid_thw_ = torch.tensor(grid_thw) + grid_thw_ = torch.tensor(grid_thw, device=x.device, dtype=torch.long) cu_seqlens = torch.repeat_interleave(grid_thw_[:, 1] * grid_thw_[:, 2], grid_thw_[:, 0]).cumsum( dim=0, dtype=torch.int32)