diff --git a/vllm/v1/kv_cache_interface.py b/vllm/v1/kv_cache_interface.py index 359b5c520d44d..7a7bb9036211b 100644 --- a/vllm/v1/kv_cache_interface.py +++ b/vllm/v1/kv_cache_interface.py @@ -88,7 +88,7 @@ class FullAttentionSpec(AttentionSpec): attention in model runner. In this case, we use FullAttentionSpec and record the sliding window size. """ - + head_size_v: int | None = None sliding_window: int | None = None