mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-26 13:03:06 +08:00
[Core] Add torch profiler CPU traces for AsyncLLM. (#21794)
Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
This commit is contained in:
parent
d46d417b58
commit
e58c5a9768
@ -667,8 +667,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
"VLLM_LORA_RESOLVER_CACHE_DIR":
|
||||
lambda: os.getenv("VLLM_LORA_RESOLVER_CACHE_DIR", None),
|
||||
|
||||
# Enables torch profiler if set. Path to the directory where torch profiler
|
||||
# traces are saved. Note that it must be an absolute path.
|
||||
# Enables torch profiler if set.
|
||||
# Both AsyncLLM's CPU traces as well as workers'
|
||||
# traces (CPU & GPU) will be saved under this directory.
|
||||
# Note that it must be an absolute path.
|
||||
"VLLM_TORCH_PROFILER_DIR":
|
||||
lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os
|
||||
.path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))),
|
||||
|
||||
@ -1,12 +1,15 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import asyncio
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
from collections.abc import AsyncGenerator, Iterable, Mapping
|
||||
from copy import copy
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.config import ModelConfig, VllmConfig
|
||||
@ -144,6 +147,26 @@ class AsyncLLM(EngineClient):
|
||||
except RuntimeError:
|
||||
pass
|
||||
|
||||
if envs.VLLM_TORCH_PROFILER_DIR:
|
||||
logger.info(
|
||||
"Torch profiler enabled. AsyncLLM CPU traces will be collected under %s", # noqa: E501
|
||||
envs.VLLM_TORCH_PROFILER_DIR)
|
||||
worker_name = f"{socket.gethostname()}_{os.getpid()}.async_llm"
|
||||
self.profiler = torch.profiler.profile(
|
||||
activities=[
|
||||
torch.profiler.ProfilerActivity.CPU,
|
||||
],
|
||||
with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
|
||||
on_trace_ready=torch.profiler.tensorboard_trace_handler(
|
||||
envs.VLLM_TORCH_PROFILER_DIR,
|
||||
worker_name=worker_name,
|
||||
use_gzip=True))
|
||||
else:
|
||||
logger.info(
|
||||
"Torch profiler disabled. AsyncLLM CPU traces will not be collected." # noqa: E501
|
||||
)
|
||||
self.profiler = None
|
||||
|
||||
@classmethod
|
||||
@deprecate_kwargs(
|
||||
"disable_log_requests",
|
||||
@ -562,10 +585,16 @@ class AsyncLLM(EngineClient):
|
||||
raise self.dead_error
|
||||
|
||||
async def start_profile(self) -> None:
|
||||
await self.engine_core.profile_async(True)
|
||||
coros = [self.engine_core.profile_async(True)]
|
||||
if self.profiler is not None:
|
||||
coros.append(asyncio.to_thread(self.profiler.start))
|
||||
await asyncio.gather(*coros)
|
||||
|
||||
async def stop_profile(self) -> None:
|
||||
await self.engine_core.profile_async(False)
|
||||
coros = [self.engine_core.profile_async(False)]
|
||||
if self.profiler is not None:
|
||||
coros.append(asyncio.to_thread(self.profiler.stop))
|
||||
await asyncio.gather(*coros)
|
||||
|
||||
async def reset_mm_cache(self) -> None:
|
||||
self.processor.mm_registry.reset_processor_cache(self.model_config)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user