mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 19:14:57 +08:00
[MISC] Rename the torch profiler filename as instance_id+rank_id for merging the Profiler results of each Rank (#25867)
Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
parent
82e64c7a20
commit
76852017ea
@ -5,6 +5,7 @@ import copy
|
|||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from dataclasses import field, replace
|
from dataclasses import field, replace
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
@ -270,6 +271,9 @@ class VllmConfig:
|
|||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
"""Verify configs are valid & consistent with each other."""
|
"""Verify configs are valid & consistent with each other."""
|
||||||
|
|
||||||
|
# To give each torch profile run a unique instance name.
|
||||||
|
self.instance_id = f"{time.time_ns()}"
|
||||||
|
|
||||||
self.try_verify_and_update_config()
|
self.try_verify_and_update_config()
|
||||||
|
|
||||||
if self.model_config is not None:
|
if self.model_config is not None:
|
||||||
|
|||||||
@ -79,6 +79,7 @@ class Worker(WorkerBase):
|
|||||||
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
|
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
|
||||||
if envs.VLLM_TORCH_PROFILER_DIR:
|
if envs.VLLM_TORCH_PROFILER_DIR:
|
||||||
torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR
|
torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR
|
||||||
|
worker_name = f"{vllm_config.instance_id}-rank-{self.rank}"
|
||||||
logger.info(
|
logger.info(
|
||||||
"Profiling enabled. Traces will be saved to: %s",
|
"Profiling enabled. Traces will be saved to: %s",
|
||||||
torch_profiler_trace_dir,
|
torch_profiler_trace_dir,
|
||||||
@ -101,7 +102,7 @@ class Worker(WorkerBase):
|
|||||||
with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
|
with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
|
||||||
with_flops=envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
|
with_flops=envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
|
||||||
on_trace_ready=torch.profiler.tensorboard_trace_handler(
|
on_trace_ready=torch.profiler.tensorboard_trace_handler(
|
||||||
torch_profiler_trace_dir, use_gzip=True
|
torch_profiler_trace_dir, worker_name=worker_name, use_gzip=True
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@ -39,6 +39,7 @@ class XPUWorker(Worker):
|
|||||||
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
|
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
|
||||||
if envs.VLLM_TORCH_PROFILER_DIR:
|
if envs.VLLM_TORCH_PROFILER_DIR:
|
||||||
torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR
|
torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR
|
||||||
|
worker_name = f"{vllm_config.instance_id}-rank-{self.rank}"
|
||||||
logger.info(
|
logger.info(
|
||||||
"Profiling enabled. Traces will be saved to: %s",
|
"Profiling enabled. Traces will be saved to: %s",
|
||||||
torch_profiler_trace_dir,
|
torch_profiler_trace_dir,
|
||||||
@ -61,7 +62,7 @@ class XPUWorker(Worker):
|
|||||||
with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
|
with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
|
||||||
with_flops=envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
|
with_flops=envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
|
||||||
on_trace_ready=torch.profiler.tensorboard_trace_handler(
|
on_trace_ready=torch.profiler.tensorboard_trace_handler(
|
||||||
torch_profiler_trace_dir, use_gzip=True
|
torch_profiler_trace_dir, worker_name=worker_name, use_gzip=True
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user