From 468e2400feff561a7e8b5d4c455612662448fe72 Mon Sep 17 00:00:00 2001
From: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com>
Date: Sat, 19 Jul 2025 02:18:48 -0400
Subject: [PATCH] [BugFix][CPU] Fix `TorchSDPABackendImpl` doesn't have
 `use_irope`  (#21200)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
---
 vllm/v1/worker/gpu_model_runner.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 9620bf6a79570..47b14d076ea68 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2668,7 +2668,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
             # TODO: Support other attention modules, e.g., cross-attention
             if attn_module.attn_type == AttentionType.DECODER:
                 use_local_attention = (self.attention_chunk_size is not None
-                                       and attn_module.impl.use_irope)
+                                       and getattr(attn_module.impl,
+                                                   "use_irope", False))
                 if attn_module.sliding_window is not None:
                     kv_cache_spec[layer_name] = SlidingWindowSpec(
                         block_size=block_size,