From ac1598d166c18658d4b3c9ec2c1efe77ff1e54d1 Mon Sep 17 00:00:00 2001
From: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com>
Date: Wed, 1 Oct 2025 19:28:00 -0400
Subject: [PATCH] [BugFix] ChunkedLocalAttention is currently not CG compatible
 (#26034)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 vllm/attention/layers/chunked_local_attention.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/vllm/attention/layers/chunked_local_attention.py b/vllm/attention/layers/chunked_local_attention.py
index 3964eca7d36b8..2d11b2238e783 100644
--- a/vllm/attention/layers/chunked_local_attention.py
+++ b/vllm/attention/layers/chunked_local_attention.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import functools
-from typing import List, Optional
+from typing import ClassVar, List, Optional
 
 import torch
 
@@ -12,8 +12,8 @@ from vllm.attention.selector import get_attn_backend
 from vllm.config import CacheConfig
 from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.v1.attention.backends.utils import (
-    CommonAttentionMetadata, make_local_attention_virtual_batches,
-    subclass_attention_backend)
+    AttentionCGSupport, CommonAttentionMetadata,
+    make_local_attention_virtual_batches, subclass_attention_backend)
 
 from ..layer import Attention
 
@@ -29,6 +29,8 @@ def create_chunked_local_attention_backend(
     underlying_builder = underlying_attn_backend.get_builder_cls()
 
     class ChunkedLocalAttentionBuilder(underlying_builder):  # type: ignore
+        cudagraph_support: ClassVar[AttentionCGSupport] = \
+            AttentionCGSupport.NEVER
 
         def build(self,
                   common_prefix_len: int,