[torch.compile] add dynamo time tracking (#11005)

Signed-off-by: youkaichao <youkaichao@gmail.com>
2025-12-24 18:15:48 +08:00 · 2024-12-08 23:09:04 -08:00 · 2024-12-08 23:09:04 -08:00 · d1c2e15eb3
commit d1c2e15eb3
parent af7c4a92e6
3 changed files with 16 additions and 5 deletions
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@ -265,7 +265,13 @@ class VllmBackend:

    def __call__(self, graph: fx.GraphModule, example_inputs) -> Callable:

+        # when dynamo calls the backend, it means the bytecode
+        # transform and analysis are done
        compilation_counter.num_graphs_seen += 1
+        from .monitor import torch_compile_start_time
+        dynamo_time = time.time() - torch_compile_start_time
+        logger.info("Dynamo bytecode transform time: %.2f s", dynamo_time)
+        self.compilation_configs.compilation_time += dynamo_time

        # we control the compilation process, each instance can only be
        # called once
--- a/vllm/compilation/decorators.py
+++ b/vllm/compilation/decorators.py
@ -145,6 +145,7 @@ def _support_torch_compile(

    def __init__(self, *, vllm_config: VllmConfig, prefix: str = '', **kwargs):
        old_init(self, vllm_config=vllm_config, prefix=prefix, **kwargs)
+        self.vllm_config = vllm_config
        # for CompilationLevel.DYNAMO_AS_IS , the upper level model runner
        # will handle the compilation, so we don't need to do anything here.
        self.do_not_compile = \
@ -157,9 +158,6 @@ def _support_torch_compile(
        TorchCompileWrapperWithCustomDispatcher.__init__(
            self, compilation_level=vllm_config.compilation_config.level)

-        if vllm_config.compilation_config.level == CompilationLevel.PIECEWISE:
-            start_monitoring_torch_compile(vllm_config.compilation_config)
-
    cls.__init__ = __init__

    def __call__(self, *args, **kwargs):
@ -186,6 +184,8 @@ def _support_torch_compile(
                        raise ValueError(
                            "Unsupported dynamic dimensions"
                            f" {dims} for argument {k} with type {type(arg)}.")
+            # here, it is the starting point of the `torch.compile` process
+            start_monitoring_torch_compile(self.vllm_config.compilation_config)

        # if we don't use custom dispatcher, we can directly call the
        # compiled function and let torch.compile handle the dispatching,
--- a/vllm/compilation/monitor.py
+++ b/vllm/compilation/monitor.py
@ -1,14 +1,19 @@
+import time
+
 from vllm.config import CompilationConfig, CompilationLevel
 from vllm.logger import init_logger

 logger = init_logger(__name__)

+torch_compile_start_time: float = 0.0
+

 def start_monitoring_torch_compile(compilation_config: CompilationConfig):
-    pass
+    global torch_compile_start_time
+    torch_compile_start_time = time.time()


 def end_monitoring_torch_compile(compilation_config: CompilationConfig):
    if compilation_config.level == CompilationLevel.PIECEWISE:
-        logger.info("graph compilation takes %.2f s in total",
+        logger.info("torch.compile takes %.2f s in total",
                    compilation_config.compilation_time)