mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-24 03:37:54 +08:00
[Misc] [ROCm] Prevent surplus tensor reshape (#19803)
Signed-off-by: Zsolt Borbely <zsolt.borbely@htecgroup.com>
This commit is contained in:
parent
2de12be428
commit
aa20d10a91
@ -376,7 +376,7 @@ class TritonAttentionImpl(AttentionImpl):
|
|||||||
query.reshape(
|
query.reshape(
|
||||||
(num_tokens, num_heads * head_size)).contiguous(),
|
(num_tokens, num_heads * head_size)).contiguous(),
|
||||||
layer._q_scale)
|
layer._q_scale)
|
||||||
query = query.reshape((num_tokens, num_heads, head_size))
|
query = query.reshape((num_tokens, num_heads, head_size))
|
||||||
|
|
||||||
use_local_attn = \
|
use_local_attn = \
|
||||||
(self.use_irope and attn_metadata.local_attn_metadata is not None)
|
(self.use_irope and attn_metadata.local_attn_metadata is not None)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user