mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-21 05:11:19 +08:00
Log how much time loading a compiled artifact takes (#16848)
Signed-off-by: rzou <zou3519@gmail.com>
This commit is contained in:
parent
d6195a748b
commit
682e0b6d2f
@ -110,10 +110,14 @@ class CompilerManager:
|
||||
compiled_graph = self.load(graph, example_inputs, graph_index,
|
||||
runtime_shape)
|
||||
if compiled_graph is not None:
|
||||
if graph_index == 0:
|
||||
# adds some info logging for the first graph
|
||||
logger.info("Directly load the compiled graph for shape %s "
|
||||
"from the cache", str(runtime_shape)) # noqa
|
||||
if graph_index == num_graphs - 1:
|
||||
# after loading the last graph for this shape, record the time.
|
||||
# there can be multiple graphs due to piecewise compilation.
|
||||
now = time.time()
|
||||
elapsed = now - compilation_start_time
|
||||
logger.info(
|
||||
"Directly load the compiled graph(s) for shape %s "
|
||||
"from the cache, took %.3f s", str(runtime_shape), elapsed)
|
||||
return compiled_graph
|
||||
|
||||
# no compiler cached the graph, or the cache is disabled,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user