[BugFix][CPU] Fix TorchSDPABackendImpl doesn't have use_irope (#21200)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
2026-01-28 13:27:15 +08:00 · 2025-07-19 02:18:48 -04:00 · 2025-07-19 02:18:48 -04:00 · 468e2400fe
commit 468e2400fe
parent dcc6cfb991
1 changed files with 2 additions and 1 deletions
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@ -2668,7 +2668,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
            # TODO: Support other attention modules, e.g., cross-attention
            if attn_module.attn_type == AttentionType.DECODER:
                use_local_attention = (self.attention_chunk_size is not None
-                                       and attn_module.impl.use_irope)
+                                       and getattr(attn_module.impl,
+                                                   "use_irope", False))
                if attn_module.sliding_window is not None:
                    kv_cache_spec[layer_name] = SlidingWindowSpec(
                        block_size=block_size,