mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 11:26:16 +08:00
[Bugfix] Initialize attention bias on the same device as Query/Key/Value for QwenVL Series (#14031)
This commit is contained in:
parent
f7bee5c815
commit
9b61dd41e7
@ -323,7 +323,8 @@ class Qwen2_5_VisionAttention(nn.Module):
|
||||
|
||||
seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()
|
||||
attn_bias = BlockDiagonalMask.from_seqlens(q_seqlen=seqlens,
|
||||
kv_seqlen=None)
|
||||
kv_seqlen=None,
|
||||
device=q.device)
|
||||
|
||||
context_layer = xops.memory_efficient_attention_forward(
|
||||
q, k, v, attn_bias=attn_bias, p=0, scale=None)
|
||||
|
||||
@ -367,7 +367,8 @@ class Qwen2VisionAttention(nn.Module):
|
||||
|
||||
seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()
|
||||
attn_bias = BlockDiagonalMask.from_seqlens(q_seqlen=seqlens,
|
||||
kv_seqlen=None)
|
||||
kv_seqlen=None,
|
||||
device=q.device)
|
||||
|
||||
context_layer = xops.memory_efficient_attention_forward(
|
||||
q, k, v, attn_bias=attn_bias, p=0, scale=None)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user