diff --git a/vllm/v1/attention/backends/triton_attn.py b/vllm/v1/attention/backends/triton_attn.py index ecb92bb1e4161..4c5a1a755c1a6 100644 --- a/vllm/v1/attention/backends/triton_attn.py +++ b/vllm/v1/attention/backends/triton_attn.py @@ -376,7 +376,7 @@ class TritonAttentionImpl(AttentionImpl): query.reshape( (num_tokens, num_heads * head_size)).contiguous(), layer._q_scale) - query = query.reshape((num_tokens, num_heads, head_size)) + query = query.reshape((num_tokens, num_heads, head_size)) use_local_attn = \ (self.use_irope and attn_metadata.local_attn_metadata is not None)