From 2bb246b8f7b8dd220008ff7bd735249b362c799a Mon Sep 17 00:00:00 2001 From: Ning Xie Date: Sun, 22 Jun 2025 13:39:09 +0800 Subject: [PATCH] [MISC] add cpu_kvcache_space_bytes to CacheConfig (#19812) Signed-off-by: Andy Xie --- vllm/config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/config.py b/vllm/config.py index 508cdfaec1c46..ce7e2a2929cf5 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -1506,6 +1506,8 @@ class CacheConfig: """This enables dynamic calculation of `k_scale` and `v_scale` when kv_cache_dtype is fp8. If `False`, the scales will be loaded from the model checkpoint if available. Otherwise, the scales will default to 1.0.""" + cpu_kvcache_space_bytes: Optional[int] = None + """(CPU backend only) CPU key-value cache space.""" # Will be set after profiling. num_gpu_blocks: Optional[int] = field(default=None, init=False)