From 26bc4bbcd8ad081de53436be657a00ea0cc9efd7 Mon Sep 17 00:00:00 2001
From: Keyun Tong <tongkeyun@gmail.com>
Date: Thu, 1 May 2025 00:30:57 -0700
Subject: [PATCH] Avoid overwriting vllm_compile_cache.py (#17418)

Signed-off-by: Keyun Tong <tongkeyun@gmail.com>
---
 vllm/compilation/backends.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py
index a1570b7eccce9..fcaf4a0f987ab 100644
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@@ -45,6 +45,7 @@ class CompilerManager:
         self.cache: Dict[Tuple[Optional[int], int, str], Any] = dict()
         cls = InductorAdaptor if use_inductor else EagerAdaptor
         self.compiler = cls()
+        self.is_cache_updated = False
 
     def compute_hash(self, vllm_config: VllmConfig) -> str:
         return self.compiler.compute_hash(vllm_config)
@@ -66,11 +67,11 @@ class CompilerManager:
                                        disable_cache=disable_cache)
 
     def save_to_file(self):
-        if self.disable_cache:
+        if self.disable_cache or not self.is_cache_updated:
             return
+        printer = pprint.PrettyPrinter(indent=4)
+        data = printer.pformat(self.cache)
         with open(self.cache_file_path, "w") as f:
-            printer = pprint.PrettyPrinter(indent=4)
-            data = printer.pformat(self.cache)
             f.write(data)
 
     def load(self,
@@ -131,6 +132,7 @@ class CompilerManager:
         if handle is not None:
             self.cache[(runtime_shape, graph_index,
                         self.compiler.name)] = handle
+            self.is_cache_updated = True
             if graph_index == 0:
                 # adds some info logging for the first graph
                 logger.info("Cache the graph of shape %s for later use",