From ac1598d166c18658d4b3c9ec2c1efe77ff1e54d1 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Wed, 1 Oct 2025 19:28:00 -0400 Subject: [PATCH] [BugFix] ChunkedLocalAttention is currently not CG compatible (#26034) Signed-off-by: Lucas Wilkinson Signed-off-by: yewentao256 --- vllm/attention/layers/chunked_local_attention.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vllm/attention/layers/chunked_local_attention.py b/vllm/attention/layers/chunked_local_attention.py index 3964eca7d36b8..2d11b2238e783 100644 --- a/vllm/attention/layers/chunked_local_attention.py +++ b/vllm/attention/layers/chunked_local_attention.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import functools -from typing import List, Optional +from typing import ClassVar, List, Optional import torch @@ -12,8 +12,8 @@ from vllm.attention.selector import get_attn_backend from vllm.config import CacheConfig from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.v1.attention.backends.utils import ( - CommonAttentionMetadata, make_local_attention_virtual_batches, - subclass_attention_backend) + AttentionCGSupport, CommonAttentionMetadata, + make_local_attention_virtual_batches, subclass_attention_backend) from ..layer import Attention @@ -29,6 +29,8 @@ def create_chunked_local_attention_backend( underlying_builder = underlying_attn_backend.get_builder_cls() class ChunkedLocalAttentionBuilder(underlying_builder): # type: ignore + cudagraph_support: ClassVar[AttentionCGSupport] = \ + AttentionCGSupport.NEVER def build(self, common_prefix_len: int,