[torch.compile] use size tuning for specific sizes (#10933)

Signed-off-by: youkaichao <youkaichao@gmail.com>
2026-06-11 14:35:43 +08:00 · 2024-12-05 20:30:41 -08:00 · 2024-12-05 20:30:41 -08:00 · db87eb6c67
commit db87eb6c67
parent 9743d64e4e
1 changed files with 6 additions and 0 deletions
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@ -43,6 +43,12 @@ def wrap_inductor(graph,
    if additional_inductor_config is not None:
        current_config.update(additional_inductor_config)
    if isinstance(runtime_shape, int):
        # for a specific batchsize, tuning triton kernel parameters
        # can be beneficial
        current_config["max_autotune"] = True
        current_config["coordinate_descent_tuning"] = True
    # inductor can inplace modify the graph, so we need to copy it
    # see https://github.com/pytorch/pytorch/issues/138980
    graph = copy.deepcopy(graph)