mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 07:44:55 +08:00
[MISC] Rename the torch profiler filename as instance_id+rank_id for merging the Profiler results of each Rank (#25867)
Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
parent
82e64c7a20
commit
76852017ea
@ -5,6 +5,7 @@ import copy
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import field, replace
|
||||
from functools import lru_cache
|
||||
@ -270,6 +271,9 @@ class VllmConfig:
|
||||
def __post_init__(self):
|
||||
"""Verify configs are valid & consistent with each other."""
|
||||
|
||||
# To give each torch profile run a unique instance name.
|
||||
self.instance_id = f"{time.time_ns()}"
|
||||
|
||||
self.try_verify_and_update_config()
|
||||
|
||||
if self.model_config is not None:
|
||||
|
||||
@ -79,6 +79,7 @@ class Worker(WorkerBase):
|
||||
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
|
||||
if envs.VLLM_TORCH_PROFILER_DIR:
|
||||
torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR
|
||||
worker_name = f"{vllm_config.instance_id}-rank-{self.rank}"
|
||||
logger.info(
|
||||
"Profiling enabled. Traces will be saved to: %s",
|
||||
torch_profiler_trace_dir,
|
||||
@ -101,7 +102,7 @@ class Worker(WorkerBase):
|
||||
with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
|
||||
with_flops=envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
|
||||
on_trace_ready=torch.profiler.tensorboard_trace_handler(
|
||||
torch_profiler_trace_dir, use_gzip=True
|
||||
torch_profiler_trace_dir, worker_name=worker_name, use_gzip=True
|
||||
),
|
||||
)
|
||||
else:
|
||||
|
||||
@ -39,6 +39,7 @@ class XPUWorker(Worker):
|
||||
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
|
||||
if envs.VLLM_TORCH_PROFILER_DIR:
|
||||
torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR
|
||||
worker_name = f"{vllm_config.instance_id}-rank-{self.rank}"
|
||||
logger.info(
|
||||
"Profiling enabled. Traces will be saved to: %s",
|
||||
torch_profiler_trace_dir,
|
||||
@ -61,7 +62,7 @@ class XPUWorker(Worker):
|
||||
with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
|
||||
with_flops=envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
|
||||
on_trace_ready=torch.profiler.tensorboard_trace_handler(
|
||||
torch_profiler_trace_dir, use_gzip=True
|
||||
torch_profiler_trace_dir, worker_name=worker_name, use_gzip=True
|
||||
),
|
||||
)
|
||||
else:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user