mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-20 20:25:01 +08:00
[Bug] Fix usage of .transpose() and .view() consecutively. (#11979)
This commit is contained in:
parent
f7b3ba82c3
commit
9dd02d85ca
@ -230,7 +230,7 @@ class MultiHeadAttention(nn.Module):
|
|||||||
value,
|
value,
|
||||||
scale=self.scale)
|
scale=self.scale)
|
||||||
out = out.transpose(1, 2)
|
out = out.transpose(1, 2)
|
||||||
return out.view(bsz, q_len, -1)
|
return out.reshape(bsz, q_len, -1)
|
||||||
|
|
||||||
|
|
||||||
def unified_attention(
|
def unified_attention(
|
||||||
|
|||||||
@ -271,7 +271,7 @@ class InternSdpaAttention(nn.Module):
|
|||||||
v = v.transpose(1, 2)
|
v = v.transpose(1, 2)
|
||||||
|
|
||||||
x = F.scaled_dot_product_attention(q, k, v, scale=self.scale)
|
x = F.scaled_dot_product_attention(q, k, v, scale=self.scale)
|
||||||
x = x.transpose(1, 2).view(B, N, -1)
|
x = x.transpose(1, 2).reshape(B, N, -1)
|
||||||
|
|
||||||
x = self.proj(x)
|
x = self.proj(x)
|
||||||
return x
|
return x
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user