diff --git a/benchmarks/benchmark_latency.py b/benchmarks/benchmark_latency.py index 0a14aedd5feb..e669ce4db299 100644 --- a/benchmarks/benchmark_latency.py +++ b/benchmarks/benchmark_latency.py @@ -52,7 +52,7 @@ def main(args: argparse.Namespace): llm.generate(dummy_prompts, sampling_params=sampling_params, use_tqdm=False) - print(p.key_averages()) + print(p.key_averages().table(sort_by="self_cuda_time_total")) else: start_time = time.perf_counter() llm.generate(dummy_prompts,