[caching] Add enable_prompt_embeds and cpu_offload_gb to compile hashes. (#29435)

Signed-off-by: zhxchen17 <zhxchen17@fb.com>
2025-12-16 09:45:30 +08:00 · 2025-11-25 16:46:41 -05:00 · 2025-11-25 16:46:41 -05:00 · 0abc79482a
commit 0abc79482a
parent 4e57c6587f
2 changed files with 1 additions and 4 deletions
--- a/vllm/config/cache.py
+++ b/vllm/config/cache.py
@ -144,7 +144,7 @@ class CacheConfig:

    kv_offloading_backend: KVOffloadingBackend | None = None
    """The backend to use for KV cache offloading. Supported backends include
-    'native' (vLLM native CPU offloading), 'lmcache' This option must be used 
+    'native' (vLLM native CPU offloading), 'lmcache' This option must be used
    together with kv_offloading_size."""

    def compute_hash(self) -> str:
@ -167,8 +167,6 @@ class CacheConfig:
            "num_gpu_blocks_override",
            "enable_prefix_caching",
            "prefix_caching_hash_algo",
-            # `cpu_offload_gb` does not use `torch.compile` yet.
-            "cpu_offload_gb",
            "cpu_kvcache_space_bytes",
            "mamba_page_size_padded",
            # Post-init/derived counters
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@ -345,7 +345,6 @@ class ModelConfig:
            "logprobs_mode",
            "disable_cascade_attn",
            "skip_tokenizer_init",
-            "enable_prompt_embeds",
            "served_model_name",
            "config_format",
            "hf_token",