diff --git a/vllm/v1/attention/backends/linear_attn.py b/vllm/v1/attention/backends/linear_attn.py index 1900c50849eca..004baa2d09cde 100644 --- a/vllm/v1/attention/backends/linear_attn.py +++ b/vllm/v1/attention/backends/linear_attn.py @@ -7,6 +7,7 @@ import torch from vllm.attention.backends.abstract import AttentionBackend from vllm.config import VllmConfig from vllm.v1.attention.backends.utils import ( + AttentionCGSupport, AttentionMetadataBuilder, CommonAttentionMetadata, split_decodes_and_prefills, @@ -35,6 +36,8 @@ class LinearAttentionMetadata: class LinearAttentionMetadataBuilder(AttentionMetadataBuilder[LinearAttentionMetadata]): reorder_batch_threshold: int = 1 + _cudagraph_support = AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE + def __init__( self, kv_cache_spec: AttentionSpec,