Enable prefix caching with full cuda graphs (#19617)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-12-16 12:35:01 +08:00 · 2025-06-15 01:05:05 -07:00 · 2025-06-15 01:05:05 -07:00 · 055915e6ce
commit 055915e6ce
parent 3d330c4c09
1 changed files with 0 additions and 1 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -4495,7 +4495,6 @@ class VllmConfig:
                "full_cuda_graph is not supported with "
                "cascade attention. Disabling cascade attention.")
            self.model_config.disable_cascade_attn = True
-            self.cache_config.enable_prefix_caching = False

        if (self.kv_events_config is not None
                and self.kv_events_config.enable_kv_cache_events