mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-27 02:14:25 +08:00
[Attention] add _cudagraph_support for linear attention (#28934)
Signed-off-by: zjy0516 <riverclouds.zhu@qq.com>
This commit is contained in:
parent
92effb07a4
commit
81db702ed2
@ -7,6 +7,7 @@ import torch
|
|||||||
from vllm.attention.backends.abstract import AttentionBackend
|
from vllm.attention.backends.abstract import AttentionBackend
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
from vllm.v1.attention.backends.utils import (
|
from vllm.v1.attention.backends.utils import (
|
||||||
|
AttentionCGSupport,
|
||||||
AttentionMetadataBuilder,
|
AttentionMetadataBuilder,
|
||||||
CommonAttentionMetadata,
|
CommonAttentionMetadata,
|
||||||
split_decodes_and_prefills,
|
split_decodes_and_prefills,
|
||||||
@ -35,6 +36,8 @@ class LinearAttentionMetadata:
|
|||||||
class LinearAttentionMetadataBuilder(AttentionMetadataBuilder[LinearAttentionMetadata]):
|
class LinearAttentionMetadataBuilder(AttentionMetadataBuilder[LinearAttentionMetadata]):
|
||||||
reorder_batch_threshold: int = 1
|
reorder_batch_threshold: int = 1
|
||||||
|
|
||||||
|
_cudagraph_support = AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
kv_cache_spec: AttentionSpec,
|
kv_cache_spec: AttentionSpec,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user