mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-19 04:15:01 +08:00
[Bugfix] Initialize attention bias on the same device as Query/Key/Value for QwenVL Series (#14031)
This commit is contained in:
parent
f7bee5c815
commit
9b61dd41e7
@ -323,7 +323,8 @@ class Qwen2_5_VisionAttention(nn.Module):
|
|||||||
|
|
||||||
seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()
|
seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()
|
||||||
attn_bias = BlockDiagonalMask.from_seqlens(q_seqlen=seqlens,
|
attn_bias = BlockDiagonalMask.from_seqlens(q_seqlen=seqlens,
|
||||||
kv_seqlen=None)
|
kv_seqlen=None,
|
||||||
|
device=q.device)
|
||||||
|
|
||||||
context_layer = xops.memory_efficient_attention_forward(
|
context_layer = xops.memory_efficient_attention_forward(
|
||||||
q, k, v, attn_bias=attn_bias, p=0, scale=None)
|
q, k, v, attn_bias=attn_bias, p=0, scale=None)
|
||||||
|
|||||||
@ -367,7 +367,8 @@ class Qwen2VisionAttention(nn.Module):
|
|||||||
|
|
||||||
seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()
|
seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()
|
||||||
attn_bias = BlockDiagonalMask.from_seqlens(q_seqlen=seqlens,
|
attn_bias = BlockDiagonalMask.from_seqlens(q_seqlen=seqlens,
|
||||||
kv_seqlen=None)
|
kv_seqlen=None,
|
||||||
|
device=q.device)
|
||||||
|
|
||||||
context_layer = xops.memory_efficient_attention_forward(
|
context_layer = xops.memory_efficient_attention_forward(
|
||||||
q, k, v, attn_bias=attn_bias, p=0, scale=None)
|
q, k, v, attn_bias=attn_bias, p=0, scale=None)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user