[caching] Add enable_prompt_embeds and cpu_offload_gb to compile hashes. (#29435)

Signed-off-by: zhxchen17 <zhxchen17@fb.com>
This commit is contained in:
Zhengxu Chen 2025-11-25 16:46:41 -05:00 committed by GitHub
parent 4e57c6587f
commit 0abc79482a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 1 additions and 4 deletions

View File

@ -144,7 +144,7 @@ class CacheConfig:
kv_offloading_backend: KVOffloadingBackend | None = None
"""The backend to use for KV cache offloading. Supported backends include
'native' (vLLM native CPU offloading), 'lmcache' This option must be used
'native' (vLLM native CPU offloading), 'lmcache' This option must be used
together with kv_offloading_size."""
def compute_hash(self) -> str:
@ -167,8 +167,6 @@ class CacheConfig:
"num_gpu_blocks_override",
"enable_prefix_caching",
"prefix_caching_hash_algo",
# `cpu_offload_gb` does not use `torch.compile` yet.
"cpu_offload_gb",
"cpu_kvcache_space_bytes",
"mamba_page_size_padded",
# Post-init/derived counters

View File

@ -345,7 +345,6 @@ class ModelConfig:
"logprobs_mode",
"disable_cascade_attn",
"skip_tokenizer_init",
"enable_prompt_embeds",
"served_model_name",
"config_format",
"hf_token",