From 81db702ed28d9a6edbd59fbd0ec039e107d36bc0 Mon Sep 17 00:00:00 2001 From: Jiangyun Zhu Date: Tue, 25 Nov 2025 12:25:20 +0800 Subject: [PATCH] [Attention] add `_cudagraph_support` for linear attention (#28934) Signed-off-by: zjy0516 --- vllm/v1/attention/backends/linear_attn.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vllm/v1/attention/backends/linear_attn.py b/vllm/v1/attention/backends/linear_attn.py index 1900c50849eca..004baa2d09cde 100644 --- a/vllm/v1/attention/backends/linear_attn.py +++ b/vllm/v1/attention/backends/linear_attn.py @@ -7,6 +7,7 @@ import torch from vllm.attention.backends.abstract import AttentionBackend from vllm.config import VllmConfig from vllm.v1.attention.backends.utils import ( + AttentionCGSupport, AttentionMetadataBuilder, CommonAttentionMetadata, split_decodes_and_prefills, @@ -35,6 +36,8 @@ class LinearAttentionMetadata: class LinearAttentionMetadataBuilder(AttentionMetadataBuilder[LinearAttentionMetadata]): reorder_batch_threshold: int = 1 + _cudagraph_support = AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE + def __init__( self, kv_cache_spec: AttentionSpec,