mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 06:55:01 +08:00
[Bugfix][Model] Fix Mllama SDPA illegal memory access for batched multi-image (#9626)
Signed-off-by: mgoin <michael@neuralmagic.com>
This commit is contained in:
parent
b548d7a5f4
commit
bb01f2915e
@ -795,17 +795,19 @@ class MllamaTextCrossAttention(nn.Module):
|
|||||||
kv_len = k.shape[0]
|
kv_len = k.shape[0]
|
||||||
q = q.transpose(0, 1).view(self.num_local_key_value_heads,
|
q = q.transpose(0, 1).view(self.num_local_key_value_heads,
|
||||||
self.num_key_value_groups, q_len,
|
self.num_key_value_groups, q_len,
|
||||||
self.head_dim)
|
self.head_dim).contiguous()
|
||||||
k = k.transpose(0,
|
k = k.transpose(0,
|
||||||
1)[:,
|
1)[:,
|
||||||
None, :, :].expand(self.num_local_key_value_heads,
|
None, :, :].expand(self.num_local_key_value_heads,
|
||||||
self.num_key_value_groups,
|
self.num_key_value_groups,
|
||||||
kv_len, self.head_dim)
|
kv_len,
|
||||||
|
self.head_dim).contiguous()
|
||||||
v = v.transpose(0,
|
v = v.transpose(0,
|
||||||
1)[:,
|
1)[:,
|
||||||
None, :, :].expand(self.num_local_key_value_heads,
|
None, :, :].expand(self.num_local_key_value_heads,
|
||||||
self.num_key_value_groups,
|
self.num_key_value_groups,
|
||||||
kv_len, self.head_dim)
|
kv_len,
|
||||||
|
self.head_dim).contiguous()
|
||||||
attention_mask = attention_mask.view(1, 1, q_len, kv_len)
|
attention_mask = attention_mask.view(1, 1, q_len, kv_len)
|
||||||
output = F.scaled_dot_product_attention(q,
|
output = F.scaled_dot_product_attention(q,
|
||||||
k,
|
k,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user