mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-21 12:29:21 +08:00
[Misc] [ROCm] Prevent surplus tensor reshape (#19803)
Signed-off-by: Zsolt Borbely <zsolt.borbely@htecgroup.com>
This commit is contained in:
parent
2de12be428
commit
aa20d10a91
@ -376,7 +376,7 @@ class TritonAttentionImpl(AttentionImpl):
|
||||
query.reshape(
|
||||
(num_tokens, num_heads * head_size)).contiguous(),
|
||||
layer._q_scale)
|
||||
query = query.reshape((num_tokens, num_heads, head_size))
|
||||
query = query.reshape((num_tokens, num_heads, head_size))
|
||||
|
||||
use_local_attn = \
|
||||
(self.use_irope and attn_metadata.local_attn_metadata is not None)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user