From 682e0b6d2fbef012abde9b90c73da23f3065e2b8 Mon Sep 17 00:00:00 2001 From: Richard Zou Date: Sat, 19 Apr 2025 12:50:46 -0400 Subject: [PATCH] Log how much time loading a compiled artifact takes (#16848) Signed-off-by: rzou --- vllm/compilation/backends.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 45988c2e9b0d4..c493a764f56d6 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -110,10 +110,14 @@ class CompilerManager: compiled_graph = self.load(graph, example_inputs, graph_index, runtime_shape) if compiled_graph is not None: - if graph_index == 0: - # adds some info logging for the first graph - logger.info("Directly load the compiled graph for shape %s " - "from the cache", str(runtime_shape)) # noqa + if graph_index == num_graphs - 1: + # after loading the last graph for this shape, record the time. + # there can be multiple graphs due to piecewise compilation. + now = time.time() + elapsed = now - compilation_start_time + logger.info( + "Directly load the compiled graph(s) for shape %s " + "from the cache, took %.3f s", str(runtime_shape), elapsed) return compiled_graph # no compiler cached the graph, or the cache is disabled,