From d2e841a10a273df06c8e57ba5ca8eb9cd8cce79e Mon Sep 17 00:00:00 2001
From: Kyle Yu <153807854+kyolebu@users.noreply.github.com>
Date: Mon, 7 Jul 2025 20:48:09 -0400
Subject: [PATCH] [Misc] Improve logging for dynamic shape cache compilation
 (#20573)

Signed-off-by: kyolebu <kyu@redhat.com>
---
 vllm/compilation/backends.py | 51 +++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 15 deletions(-)

diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py
index a2bb053cec4a..5148c289d865 100644
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@@ -120,10 +120,15 @@ class CompilerManager:
         handle = self.cache[(runtime_shape, graph_index, self.compiler.name)]
         compiled_graph = self.compiler.load(handle, graph, example_inputs,
                                             graph_index, runtime_shape)
-        logger.debug(
-            "Directly load the %s-th graph for shape %s from %s via "
-            "handle %s", graph_index, str(runtime_shape), self.compiler.name,
-            handle)
+        if runtime_shape is None:
+            logger.debug(
+                "Directly load the %s-th graph for dynamic shape from %s via "
+                "handle %s", graph_index, self.compiler.name, handle)
+        else:
+            logger.debug(
+                "Directly load the %s-th graph for shape %s from %s via "
+                "handle %s", graph_index, str(runtime_shape),
+                self.compiler.name, handle)
         return compiled_graph
 
     def compile(self,
@@ -152,9 +157,15 @@ class CompilerManager:
                 # there can be multiple graphs due to piecewise compilation.
                 now = time.time()
                 elapsed = now - compilation_start_time
-                logger.info(
-                    "Directly load the compiled graph(s) for shape %s "
-                    "from the cache, took %.3f s", str(runtime_shape), elapsed)
+                if runtime_shape is None:
+                    logger.info(
+                        "Directly load the compiled graph(s) for dynamic shape "
+                        "from the cache, took %.3f s", elapsed)
+                else:
+                    logger.info(
+                        "Directly load the compiled graph(s) for shape %s "
+                        "from the cache, took %.3f s", str(runtime_shape),
+                        elapsed)
             return compiled_graph
 
         # no compiler cached the graph, or the cache is disabled,
@@ -178,11 +189,21 @@ class CompilerManager:
             self.is_cache_updated = True
             if graph_index == 0:
                 # adds some info logging for the first graph
-                logger.info("Cache the graph of shape %s for later use",
-                            str(runtime_shape))
-            logger.debug(
-                "store the %s-th graph for shape %s from %s via handle %s",
-                graph_index, str(runtime_shape), self.compiler.name, handle)
+                if runtime_shape is None:
+                    logger.info(
+                        "Cache the graph for dynamic shape for later use")
+                else:
+                    logger.info("Cache the graph of shape %s for later use",
+                                str(runtime_shape))
+            if runtime_shape is None:
+                logger.debug(
+                    "Store the %s-th graph for dynamic shape from %s via "
+                    "handle %s", graph_index, self.compiler.name, handle)
+            else:
+                logger.debug(
+                    "Store the %s-th graph for shape %s from %s via handle %s",
+                    graph_index, str(runtime_shape), self.compiler.name,
+                    handle)
 
         # after compiling the last graph, record the end time
         if graph_index == num_graphs - 1:
@@ -190,7 +211,7 @@ class CompilerManager:
             elapsed = now - compilation_start_time
             compilation_config.compilation_time += elapsed
             if runtime_shape is None:
-                logger.info("Compiling a graph for general shape takes %.2f s",
+                logger.info("Compiling a graph for dynamic shape takes %.2f s",
                             elapsed)
             else:
                 logger.info("Compiling a graph for shape %s takes %.2f s",
@@ -308,7 +329,7 @@ class PiecewiseCompileInterpreter(torch.fx.Interpreter):
                 i for i, x in enumerate(args) if isinstance(x, torch.SymInt)
             ]
             global compilation_start_time
-            compiled_graph_for_general_shape = self.vllm_backend.\
+            compiled_graph_for_dynamic_shape = self.vllm_backend.\
                 compiler_manager.compile(
                 submod,
                 args,
@@ -323,7 +344,7 @@ class PiecewiseCompileInterpreter(torch.fx.Interpreter):
             self.module.__dict__[target] = piecewise_backend(
                 submod, self.vllm_config, self.graph_pool, index,
                 len(self.compile_submod_names), sym_shape_indices,
-                compiled_graph_for_general_shape, self.vllm_backend)
+                compiled_graph_for_dynamic_shape, self.vllm_backend)
 
             compilation_counter.num_piecewise_capturable_graphs_seen += 1