From 055915e6ce0bdd3a9f7222cd23084197faaed408 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Sun, 15 Jun 2025 01:05:05 -0700
Subject: [PATCH] Enable prefix caching with full cuda graphs (#19617)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
---
 vllm/config.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vllm/config.py b/vllm/config.py
index b36bae806c3e7..7217a659a5595 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -4495,7 +4495,6 @@ class VllmConfig:
                 "full_cuda_graph is not supported with "
                 "cascade attention. Disabling cascade attention.")
             self.model_config.disable_cascade_attn = True
-            self.cache_config.enable_prefix_caching = False
 
         if (self.kv_events_config is not None
                 and self.kv_events_config.enable_kv_cache_events