mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-24 06:47:03 +08:00
[torch.compile] use size tuning for specific sizes (#10933)
Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
parent
9743d64e4e
commit
db87eb6c67
@ -43,6 +43,12 @@ def wrap_inductor(graph,
|
|||||||
if additional_inductor_config is not None:
|
if additional_inductor_config is not None:
|
||||||
current_config.update(additional_inductor_config)
|
current_config.update(additional_inductor_config)
|
||||||
|
|
||||||
|
if isinstance(runtime_shape, int):
|
||||||
|
# for a specific batchsize, tuning triton kernel parameters
|
||||||
|
# can be beneficial
|
||||||
|
current_config["max_autotune"] = True
|
||||||
|
current_config["coordinate_descent_tuning"] = True
|
||||||
|
|
||||||
# inductor can inplace modify the graph, so we need to copy it
|
# inductor can inplace modify the graph, so we need to copy it
|
||||||
# see https://github.com/pytorch/pytorch/issues/138980
|
# see https://github.com/pytorch/pytorch/issues/138980
|
||||||
graph = copy.deepcopy(graph)
|
graph = copy.deepcopy(graph)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user