[Core] Add torch profiler CPU traces for AsyncLLM. (#21794)

Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
This commit is contained in:
Chenheli Hua 2025-08-19 19:32:47 -07:00 committed by GitHub
parent d46d417b58
commit e58c5a9768
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 35 additions and 4 deletions

View File

@ -667,8 +667,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_LORA_RESOLVER_CACHE_DIR":
lambda: os.getenv("VLLM_LORA_RESOLVER_CACHE_DIR", None),
# Enables torch profiler if set. Path to the directory where torch profiler
# traces are saved. Note that it must be an absolute path.
# Enables torch profiler if set.
# Both AsyncLLM's CPU traces as well as workers'
# traces (CPU & GPU) will be saved under this directory.
# Note that it must be an absolute path.
"VLLM_TORCH_PROFILER_DIR":
lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os
.path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))),

View File

@ -1,12 +1,15 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio
import os
import socket
import time
from collections.abc import AsyncGenerator, Iterable, Mapping
from copy import copy
from typing import Any, Optional, Union
import numpy as np
import torch
import vllm.envs as envs
from vllm.config import ModelConfig, VllmConfig
@ -144,6 +147,26 @@ class AsyncLLM(EngineClient):
except RuntimeError:
pass
if envs.VLLM_TORCH_PROFILER_DIR:
logger.info(
"Torch profiler enabled. AsyncLLM CPU traces will be collected under %s", # noqa: E501
envs.VLLM_TORCH_PROFILER_DIR)
worker_name = f"{socket.gethostname()}_{os.getpid()}.async_llm"
self.profiler = torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
],
with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
on_trace_ready=torch.profiler.tensorboard_trace_handler(
envs.VLLM_TORCH_PROFILER_DIR,
worker_name=worker_name,
use_gzip=True))
else:
logger.info(
"Torch profiler disabled. AsyncLLM CPU traces will not be collected." # noqa: E501
)
self.profiler = None
@classmethod
@deprecate_kwargs(
"disable_log_requests",
@ -562,10 +585,16 @@ class AsyncLLM(EngineClient):
raise self.dead_error
async def start_profile(self) -> None:
await self.engine_core.profile_async(True)
coros = [self.engine_core.profile_async(True)]
if self.profiler is not None:
coros.append(asyncio.to_thread(self.profiler.start))
await asyncio.gather(*coros)
async def stop_profile(self) -> None:
await self.engine_core.profile_async(False)
coros = [self.engine_core.profile_async(False)]
if self.profiler is not None:
coros.append(asyncio.to_thread(self.profiler.stop))
await asyncio.gather(*coros)
async def reset_mm_cache(self) -> None:
self.processor.mm_registry.reset_processor_cache(self.model_config)