From 9b61dd41e7f515c5c6a141a802cda86075bd9625 Mon Sep 17 00:00:00 2001 From: Yang Liu <651636074@qq.com> Date: Fri, 28 Feb 2025 23:36:08 +0800 Subject: [PATCH] [Bugfix] Initialize attention bias on the same device as Query/Key/Value for QwenVL Series (#14031) --- vllm/model_executor/models/qwen2_5_vl.py | 3 ++- vllm/model_executor/models/qwen2_vl.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index 0dbff665b5d3b..ef3d28c8087d2 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -323,7 +323,8 @@ class Qwen2_5_VisionAttention(nn.Module): seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist() attn_bias = BlockDiagonalMask.from_seqlens(q_seqlen=seqlens, - kv_seqlen=None) + kv_seqlen=None, + device=q.device) context_layer = xops.memory_efficient_attention_forward( q, k, v, attn_bias=attn_bias, p=0, scale=None) diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index cb92fcbe9fa1a..523b53d5ee41a 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -367,7 +367,8 @@ class Qwen2VisionAttention(nn.Module): seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist() attn_bias = BlockDiagonalMask.from_seqlens(q_seqlen=seqlens, - kv_seqlen=None) + kv_seqlen=None, + device=q.device) context_layer = xops.memory_efficient_attention_forward( q, k, v, attn_bias=attn_bias, p=0, scale=None)