[torch.compile] add dynamo time tracking (#11005)

Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
youkaichao 2024-12-08 23:09:04 -08:00 committed by GitHub
parent af7c4a92e6
commit d1c2e15eb3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 16 additions and 5 deletions

View File

@ -265,7 +265,13 @@ class VllmBackend:
def __call__(self, graph: fx.GraphModule, example_inputs) -> Callable:
# when dynamo calls the backend, it means the bytecode
# transform and analysis are done
compilation_counter.num_graphs_seen += 1
from .monitor import torch_compile_start_time
dynamo_time = time.time() - torch_compile_start_time
logger.info("Dynamo bytecode transform time: %.2f s", dynamo_time)
self.compilation_configs.compilation_time += dynamo_time
# we control the compilation process, each instance can only be
# called once

View File

@ -145,6 +145,7 @@ def _support_torch_compile(
def __init__(self, *, vllm_config: VllmConfig, prefix: str = '', **kwargs):
old_init(self, vllm_config=vllm_config, prefix=prefix, **kwargs)
self.vllm_config = vllm_config
# for CompilationLevel.DYNAMO_AS_IS , the upper level model runner
# will handle the compilation, so we don't need to do anything here.
self.do_not_compile = \
@ -157,9 +158,6 @@ def _support_torch_compile(
TorchCompileWrapperWithCustomDispatcher.__init__(
self, compilation_level=vllm_config.compilation_config.level)
if vllm_config.compilation_config.level == CompilationLevel.PIECEWISE:
start_monitoring_torch_compile(vllm_config.compilation_config)
cls.__init__ = __init__
def __call__(self, *args, **kwargs):
@ -186,6 +184,8 @@ def _support_torch_compile(
raise ValueError(
"Unsupported dynamic dimensions"
f" {dims} for argument {k} with type {type(arg)}.")
# here, it is the starting point of the `torch.compile` process
start_monitoring_torch_compile(self.vllm_config.compilation_config)
# if we don't use custom dispatcher, we can directly call the
# compiled function and let torch.compile handle the dispatching,

View File

@ -1,14 +1,19 @@
import time
from vllm.config import CompilationConfig, CompilationLevel
from vllm.logger import init_logger
logger = init_logger(__name__)
torch_compile_start_time: float = 0.0
def start_monitoring_torch_compile(compilation_config: CompilationConfig):
pass
global torch_compile_start_time
torch_compile_start_time = time.time()
def end_monitoring_torch_compile(compilation_config: CompilationConfig):
if compilation_config.level == CompilationLevel.PIECEWISE:
logger.info("graph compilation takes %.2f s in total",
logger.info("torch.compile takes %.2f s in total",
compilation_config.compilation_time)