mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 02:25:36 +08:00
[V1] fix torch profiling for V1 offline scenarios (#18445)
Signed-off-by: Divakar Verma <divakar.verma@amd.com>
This commit is contained in:
parent
9a21e331ff
commit
774c5fde30
@ -6,13 +6,12 @@ import dataclasses
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
import vllm.envs as envs
|
||||||
from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
|
from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
from vllm.engine.arg_utils import EngineArgs
|
from vllm.engine.arg_utils import EngineArgs
|
||||||
@ -80,17 +79,9 @@ def main(args: argparse.Namespace):
|
|||||||
|
|
||||||
def run_to_completion(profile_dir: Optional[str] = None):
|
def run_to_completion(profile_dir: Optional[str] = None):
|
||||||
if profile_dir:
|
if profile_dir:
|
||||||
with torch.profiler.profile(
|
llm.start_profile()
|
||||||
activities=[
|
llm_generate()
|
||||||
torch.profiler.ProfilerActivity.CPU,
|
llm.stop_profile()
|
||||||
torch.profiler.ProfilerActivity.CUDA,
|
|
||||||
],
|
|
||||||
on_trace_ready=torch.profiler.tensorboard_trace_handler(
|
|
||||||
str(profile_dir)
|
|
||||||
),
|
|
||||||
) as p:
|
|
||||||
llm_generate()
|
|
||||||
print(p.key_averages().table(sort_by="self_cuda_time_total"))
|
|
||||||
else:
|
else:
|
||||||
start_time = time.perf_counter()
|
start_time = time.perf_counter()
|
||||||
llm_generate()
|
llm_generate()
|
||||||
@ -103,11 +94,7 @@ def main(args: argparse.Namespace):
|
|||||||
run_to_completion(profile_dir=None)
|
run_to_completion(profile_dir=None)
|
||||||
|
|
||||||
if args.profile:
|
if args.profile:
|
||||||
profile_dir = args.profile_result_dir
|
profile_dir = envs.VLLM_TORCH_PROFILER_DIR
|
||||||
if not profile_dir:
|
|
||||||
profile_dir = (
|
|
||||||
Path(".") / "vllm_benchmark_result" / f"latency_result_{time.time()}"
|
|
||||||
)
|
|
||||||
print(f"Profiling (results will be saved to '{profile_dir}')...")
|
print(f"Profiling (results will be saved to '{profile_dir}')...")
|
||||||
run_to_completion(profile_dir=profile_dir)
|
run_to_completion(profile_dir=profile_dir)
|
||||||
return
|
return
|
||||||
@ -164,15 +151,6 @@ if __name__ == "__main__":
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="profile the generation process of a single batch",
|
help="profile the generation process of a single batch",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"--profile-result-dir",
|
|
||||||
type=str,
|
|
||||||
default=None,
|
|
||||||
help=(
|
|
||||||
"path to save the pytorch profiler output. Can be visualized "
|
|
||||||
"with ui.perfetto.dev or Tensorboard."
|
|
||||||
),
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output-json",
|
"--output-json",
|
||||||
type=str,
|
type=str,
|
||||||
@ -193,4 +171,9 @@ if __name__ == "__main__":
|
|||||||
# numbers. We need to disable prefix caching by default.
|
# numbers. We need to disable prefix caching by default.
|
||||||
parser.set_defaults(enable_prefix_caching=False)
|
parser.set_defaults(enable_prefix_caching=False)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
if args.profile and not envs.VLLM_TORCH_PROFILER_DIR:
|
||||||
|
raise OSError(
|
||||||
|
"The environment variable 'VLLM_TORCH_PROFILER_DIR' is not set. "
|
||||||
|
"Please set it to a valid path to use torch profiler."
|
||||||
|
)
|
||||||
main(args)
|
main(args)
|
||||||
|
|||||||
@ -6,13 +6,12 @@ import dataclasses
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
import vllm.envs as envs
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
from vllm.benchmarks.utils import (convert_to_pytorch_benchmark_format,
|
from vllm.benchmarks.utils import (convert_to_pytorch_benchmark_format,
|
||||||
write_to_json)
|
write_to_json)
|
||||||
@ -59,13 +58,6 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="profile the generation process of a single batch",
|
help="profile the generation process of a single batch",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"--profile-result-dir",
|
|
||||||
type=str,
|
|
||||||
default=None,
|
|
||||||
help=("path to save the pytorch profiler output. Can be visualized "
|
|
||||||
"with ui.perfetto.dev or Tensorboard."),
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output-json",
|
"--output-json",
|
||||||
type=str,
|
type=str,
|
||||||
@ -87,7 +79,10 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
|||||||
|
|
||||||
def main(args: argparse.Namespace):
|
def main(args: argparse.Namespace):
|
||||||
print(args)
|
print(args)
|
||||||
|
if args.profile and not envs.VLLM_TORCH_PROFILER_DIR:
|
||||||
|
raise OSError(
|
||||||
|
"The environment variable 'VLLM_TORCH_PROFILER_DIR' is not set. "
|
||||||
|
"Please set it to a valid path to use torch profiler.")
|
||||||
engine_args = EngineArgs.from_cli_args(args)
|
engine_args = EngineArgs.from_cli_args(args)
|
||||||
|
|
||||||
# NOTE(woosuk): If the request cannot be processed in a single batch,
|
# NOTE(woosuk): If the request cannot be processed in a single batch,
|
||||||
@ -131,16 +126,9 @@ def main(args: argparse.Namespace):
|
|||||||
|
|
||||||
def run_to_completion(profile_dir: Optional[str] = None):
|
def run_to_completion(profile_dir: Optional[str] = None):
|
||||||
if profile_dir:
|
if profile_dir:
|
||||||
with torch.profiler.profile(
|
llm.start_profile()
|
||||||
activities=[
|
llm_generate()
|
||||||
torch.profiler.ProfilerActivity.CPU,
|
llm.stop_profile()
|
||||||
torch.profiler.ProfilerActivity.CUDA,
|
|
||||||
],
|
|
||||||
on_trace_ready=torch.profiler.tensorboard_trace_handler(
|
|
||||||
str(profile_dir)),
|
|
||||||
) as p:
|
|
||||||
llm_generate()
|
|
||||||
print(p.key_averages().table(sort_by="self_cuda_time_total"))
|
|
||||||
else:
|
else:
|
||||||
start_time = time.perf_counter()
|
start_time = time.perf_counter()
|
||||||
llm_generate()
|
llm_generate()
|
||||||
@ -153,10 +141,7 @@ def main(args: argparse.Namespace):
|
|||||||
run_to_completion(profile_dir=None)
|
run_to_completion(profile_dir=None)
|
||||||
|
|
||||||
if args.profile:
|
if args.profile:
|
||||||
profile_dir = args.profile_result_dir
|
profile_dir = envs.VLLM_TORCH_PROFILER_DIR
|
||||||
if not profile_dir:
|
|
||||||
profile_dir = (Path(".") / "vllm_benchmark_result" /
|
|
||||||
f"latency_result_{time.time()}")
|
|
||||||
print(f"Profiling (results will be saved to '{profile_dir}')...")
|
print(f"Profiling (results will be saved to '{profile_dir}')...")
|
||||||
run_to_completion(profile_dir=profile_dir)
|
run_to_completion(profile_dir=profile_dir)
|
||||||
return
|
return
|
||||||
|
|||||||
@ -292,6 +292,8 @@ class Worker(WorkerBase):
|
|||||||
self.profiler.start()
|
self.profiler.start()
|
||||||
else:
|
else:
|
||||||
self.profiler.stop()
|
self.profiler.stop()
|
||||||
|
print(self.profiler.key_averages().table(
|
||||||
|
sort_by="self_cuda_time_total"))
|
||||||
|
|
||||||
def execute_dummy_batch(self) -> None:
|
def execute_dummy_batch(self) -> None:
|
||||||
self.model_runner._dummy_run(1)
|
self.model_runner._dummy_run(1)
|
||||||
|
|||||||
@ -128,6 +128,8 @@ class Worker(LocalOrDistributedWorkerBase):
|
|||||||
if self.profiler is None:
|
if self.profiler is None:
|
||||||
raise RuntimeError("Profiler is not enabled.")
|
raise RuntimeError("Profiler is not enabled.")
|
||||||
self.profiler.stop()
|
self.profiler.stop()
|
||||||
|
print(
|
||||||
|
self.profiler.key_averages().table(sort_by="self_cuda_time_total"))
|
||||||
|
|
||||||
def sleep(self, level: int = 1) -> None:
|
def sleep(self, level: int = 1) -> None:
|
||||||
free_bytes_before_sleep = torch.cuda.mem_get_info()[0]
|
free_bytes_before_sleep = torch.cuda.mem_get_info()[0]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user