mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 14:45:44 +08:00
[MISC] add cpu_kvcache_space_bytes to CacheConfig (#19812)
Signed-off-by: Andy Xie <andy.xning@gmail.com>
This commit is contained in:
parent
4c409cabc2
commit
2bb246b8f7
@ -1506,6 +1506,8 @@ class CacheConfig:
|
|||||||
"""This enables dynamic calculation of `k_scale` and `v_scale` when
|
"""This enables dynamic calculation of `k_scale` and `v_scale` when
|
||||||
kv_cache_dtype is fp8. If `False`, the scales will be loaded from the model
|
kv_cache_dtype is fp8. If `False`, the scales will be loaded from the model
|
||||||
checkpoint if available. Otherwise, the scales will default to 1.0."""
|
checkpoint if available. Otherwise, the scales will default to 1.0."""
|
||||||
|
cpu_kvcache_space_bytes: Optional[int] = None
|
||||||
|
"""(CPU backend only) CPU key-value cache space."""
|
||||||
|
|
||||||
# Will be set after profiling.
|
# Will be set after profiling.
|
||||||
num_gpu_blocks: Optional[int] = field(default=None, init=False)
|
num_gpu_blocks: Optional[int] = field(default=None, init=False)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user