From aa20d10a9182677ffc419e2d823e00237f56eb0d Mon Sep 17 00:00:00 2001 From: zsolt-borbely-htec Date: Thu, 19 Jun 2025 07:57:16 +0200 Subject: [PATCH] [Misc] [ROCm] Prevent surplus tensor reshape (#19803) Signed-off-by: Zsolt Borbely --- vllm/v1/attention/backends/triton_attn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/attention/backends/triton_attn.py b/vllm/v1/attention/backends/triton_attn.py index ecb92bb1e4161..4c5a1a755c1a6 100644 --- a/vllm/v1/attention/backends/triton_attn.py +++ b/vllm/v1/attention/backends/triton_attn.py @@ -376,7 +376,7 @@ class TritonAttentionImpl(AttentionImpl): query.reshape( (num_tokens, num_heads * head_size)).contiguous(), layer._q_scale) - query = query.reshape((num_tokens, num_heads, head_size)) + query = query.reshape((num_tokens, num_heads, head_size)) use_local_attn = \ (self.use_irope and attn_metadata.local_attn_metadata is not None)