diff --git a/vllm/config.py b/vllm/config.py index b36bae806c3e7..7217a659a5595 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -4495,7 +4495,6 @@ class VllmConfig: "full_cuda_graph is not supported with " "cascade attention. Disabling cascade attention.") self.model_config.disable_cascade_attn = True - self.cache_config.enable_prefix_caching = False if (self.kv_events_config is not None and self.kv_events_config.enable_kv_cache_events