[Attention] add _cudagraph_support for linear attention (#28934)

Signed-off-by: zjy0516 <riverclouds.zhu@qq.com>
2026-03-16 16:27:15 +08:00 · 2025-11-25 12:25:20 +08:00 · 2025-11-25 12:25:20 +08:00 · 81db702ed2
commit 81db702ed2
parent 92effb07a4
1 changed files with 3 additions and 0 deletions
--- a/vllm/v1/attention/backends/linear_attn.py
+++ b/vllm/v1/attention/backends/linear_attn.py
@ -7,6 +7,7 @@ import torch
 from vllm.attention.backends.abstract import AttentionBackend
 from vllm.config import VllmConfig
 from vllm.v1.attention.backends.utils import (
+    AttentionCGSupport,
    AttentionMetadataBuilder,
    CommonAttentionMetadata,
    split_decodes_and_prefills,
@ -35,6 +36,8 @@ class LinearAttentionMetadata:
 class LinearAttentionMetadataBuilder(AttentionMetadataBuilder[LinearAttentionMetadata]):
    reorder_batch_threshold: int = 1

+    _cudagraph_support = AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE
+
    def __init__(
        self,
        kv_cache_spec: AttentionSpec,