[MISC] add cpu_kvcache_space_bytes to CacheConfig (#19812)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
2025-12-13 14:45:44 +08:00 · 2025-06-22 13:39:09 +08:00 · 2025-06-22 13:39:09 +08:00 · 2bb246b8f7
commit 2bb246b8f7
parent 4c409cabc2
1 changed files with 2 additions and 0 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -1506,6 +1506,8 @@ class CacheConfig:
    """This enables dynamic calculation of `k_scale` and `v_scale` when
    kv_cache_dtype is fp8. If `False`, the scales will be loaded from the model
    checkpoint if available. Otherwise, the scales will default to 1.0."""
    cpu_kvcache_space_bytes: Optional[int] = None
    """(CPU backend only) CPU key-value cache space."""
    # Will be set after profiling.
    num_gpu_blocks: Optional[int] = field(default=None, init=False)