From 26bc4bbcd8ad081de53436be657a00ea0cc9efd7 Mon Sep 17 00:00:00 2001 From: Keyun Tong Date: Thu, 1 May 2025 00:30:57 -0700 Subject: [PATCH] Avoid overwriting vllm_compile_cache.py (#17418) Signed-off-by: Keyun Tong --- vllm/compilation/backends.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index a1570b7eccce9..fcaf4a0f987ab 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -45,6 +45,7 @@ class CompilerManager: self.cache: Dict[Tuple[Optional[int], int, str], Any] = dict() cls = InductorAdaptor if use_inductor else EagerAdaptor self.compiler = cls() + self.is_cache_updated = False def compute_hash(self, vllm_config: VllmConfig) -> str: return self.compiler.compute_hash(vllm_config) @@ -66,11 +67,11 @@ class CompilerManager: disable_cache=disable_cache) def save_to_file(self): - if self.disable_cache: + if self.disable_cache or not self.is_cache_updated: return + printer = pprint.PrettyPrinter(indent=4) + data = printer.pformat(self.cache) with open(self.cache_file_path, "w") as f: - printer = pprint.PrettyPrinter(indent=4) - data = printer.pformat(self.cache) f.write(data) def load(self, @@ -131,6 +132,7 @@ class CompilerManager: if handle is not None: self.cache[(runtime_shape, graph_index, self.compiler.name)] = handle + self.is_cache_updated = True if graph_index == 0: # adds some info logging for the first graph logger.info("Cache the graph of shape %s for later use",