From 468e2400feff561a7e8b5d4c455612662448fe72 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Sat, 19 Jul 2025 02:18:48 -0400 Subject: [PATCH] [BugFix][CPU] Fix `TorchSDPABackendImpl` doesn't have `use_irope` (#21200) Signed-off-by: Lucas Wilkinson --- vllm/v1/worker/gpu_model_runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 9620bf6a79570..47b14d076ea68 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2668,7 +2668,8 @@ class GPUModelRunner(LoRAModelRunnerMixin): # TODO: Support other attention modules, e.g., cross-attention if attn_module.attn_type == AttentionType.DECODER: use_local_attention = (self.attention_chunk_size is not None - and attn_module.impl.use_irope) + and getattr(attn_module.impl, + "use_irope", False)) if attn_module.sliding_window is not None: kv_cache_spec[layer_name] = SlidingWindowSpec( block_size=block_size,