From d04973ad5446fe05c06035f6b2d99402fc3ac7bf Mon Sep 17 00:00:00 2001
From: Bellk17 <Kyletbell@ymail.com>
Date: Fri, 12 Apr 2024 16:41:26 -0700
Subject: [PATCH] Fix triton compilation issue (#3984)

Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
---
 vllm/attention/ops/triton_flash_attention.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/vllm/attention/ops/triton_flash_attention.py b/vllm/attention/ops/triton_flash_attention.py
index 87cf30cbef79a..e160411859f0b 100644
--- a/vllm/attention/ops/triton_flash_attention.py
+++ b/vllm/attention/ops/triton_flash_attention.py
@@ -415,7 +415,11 @@ def attn_fwd(
             return
 
     is_mqa = hq != hk
-    off_h_k = off_h_q % hk if is_mqa else off_h_q
+    if is_mqa:  # noqa: SIM108
+        off_h_k = off_h_q % hk
+    else:
+        off_h_k = off_h_q
+
     n_extra_tokens = 0
     if seqlen_k < BLOCK_N:
         n_extra_tokens = BLOCK_N - seqlen_k