diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 9620bf6a79570..47b14d076ea68 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2668,7 +2668,8 @@ class GPUModelRunner(LoRAModelRunnerMixin): # TODO: Support other attention modules, e.g., cross-attention if attn_module.attn_type == AttentionType.DECODER: use_local_attention = (self.attention_chunk_size is not None - and attn_module.impl.use_irope) + and getattr(attn_module.impl, + "use_irope", False)) if attn_module.sliding_window is not None: kv_cache_spec[layer_name] = SlidingWindowSpec( block_size=block_size,