diff --git a/vllm/config/cache.py b/vllm/config/cache.py index ef6928d8ebd5c..00530846fce00 100644 --- a/vllm/config/cache.py +++ b/vllm/config/cache.py @@ -144,7 +144,7 @@ class CacheConfig: kv_offloading_backend: KVOffloadingBackend | None = None """The backend to use for KV cache offloading. Supported backends include - 'native' (vLLM native CPU offloading), 'lmcache' This option must be used + 'native' (vLLM native CPU offloading), 'lmcache' This option must be used together with kv_offloading_size.""" def compute_hash(self) -> str: @@ -167,8 +167,6 @@ class CacheConfig: "num_gpu_blocks_override", "enable_prefix_caching", "prefix_caching_hash_algo", - # `cpu_offload_gb` does not use `torch.compile` yet. - "cpu_offload_gb", "cpu_kvcache_space_bytes", "mamba_page_size_padded", # Post-init/derived counters diff --git a/vllm/config/model.py b/vllm/config/model.py index ce5e824da5c22..25972f097f53d 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -345,7 +345,6 @@ class ModelConfig: "logprobs_mode", "disable_cascade_attn", "skip_tokenizer_init", - "enable_prompt_embeds", "served_model_name", "config_format", "hf_token",