diff --git a/vllm/v1/core/single_type_kv_cache_manager.py b/vllm/v1/core/single_type_kv_cache_manager.py index e6f65da36e413..4eeea533464c9 100644 --- a/vllm/v1/core/single_type_kv_cache_manager.py +++ b/vllm/v1/core/single_type_kv_cache_manager.py @@ -787,7 +787,7 @@ class CrossAttentionManager(SingleTypeKVCacheManager): class SinkFullAttentionManager(FullAttentionManager): def __init__( self, - kv_cache_spec: KVCacheSpec, + kv_cache_spec: SinkFullAttentionSpec, block_pool: BlockPool, kv_cache_group_id: int, dcp_world_size: int = 1, diff --git a/vllm/v1/kv_cache_interface.py b/vllm/v1/kv_cache_interface.py index 656f5e7b81f55..c0ab66f7081f7 100644 --- a/vllm/v1/kv_cache_interface.py +++ b/vllm/v1/kv_cache_interface.py @@ -299,7 +299,7 @@ class CrossAttentionSpec(AttentionSpec): return cdiv(max_encoder_len, self.block_size) * self.page_size_bytes -@dataclass(forzen=True) +@dataclass(frozen=True) class SinkFullAttentionSpec(FullAttentionSpec): sink_len: int | None = None