Avoid overwriting vllm_compile_cache.py (#17418)

Signed-off-by: Keyun Tong <tongkeyun@gmail.com>
2026-03-17 10:07:09 +08:00 · 2025-05-01 00:30:57 -07:00 · 2025-05-01 00:30:57 -07:00 · 26bc4bbcd8
commit 26bc4bbcd8
parent 3c3d767201
1 changed files with 5 additions and 3 deletions
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@ -45,6 +45,7 @@ class CompilerManager:
        self.cache: Dict[Tuple[Optional[int], int, str], Any] = dict()
        cls = InductorAdaptor if use_inductor else EagerAdaptor
        self.compiler = cls()
+        self.is_cache_updated = False

    def compute_hash(self, vllm_config: VllmConfig) -> str:
        return self.compiler.compute_hash(vllm_config)
@ -66,11 +67,11 @@ class CompilerManager:
                                       disable_cache=disable_cache)

    def save_to_file(self):
-        if self.disable_cache:
+        if self.disable_cache or not self.is_cache_updated:
            return
+        printer = pprint.PrettyPrinter(indent=4)
+        data = printer.pformat(self.cache)
        with open(self.cache_file_path, "w") as f:
-            printer = pprint.PrettyPrinter(indent=4)
-            data = printer.pformat(self.cache)
            f.write(data)

    def load(self,
@ -131,6 +132,7 @@ class CompilerManager:
        if handle is not None:
            self.cache[(runtime_shape, graph_index,
                        self.compiler.name)] = handle
+            self.is_cache_updated = True
            if graph_index == 0:
                # adds some info logging for the first graph
                logger.info("Cache the graph of shape %s for later use",