[caching] Add enable_prompt_embeds and cpu_offload_gb to compile hashes. (#29435)

Signed-off-by: zhxchen17 <zhxchen17@fb.com>
2025-12-17 07:05:01 +08:00 · 2025-11-25 16:46:41 -05:00 · 2025-11-25 16:46:41 -05:00 · 0abc79482a
commit 0abc79482a
parent 4e57c6587f
2 changed files with 1 additions and 4 deletions
--- a/vllm/config/cache.py
+++ b/vllm/config/cache.py
@ -167,8 +167,6 @@ class CacheConfig:
            "num_gpu_blocks_override",
            "enable_prefix_caching",
            "prefix_caching_hash_algo",
            # `cpu_offload_gb` does not use `torch.compile` yet.
            "cpu_offload_gb",
            "cpu_kvcache_space_bytes",
            "mamba_page_size_padded",
            # Post-init/derived counters
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@ -345,7 +345,6 @@ class ModelConfig:
            "logprobs_mode",
            "disable_cascade_attn",
            "skip_tokenizer_init",
            "enable_prompt_embeds",
            "served_model_name",
            "config_format",
            "hf_token",