[Core] Add torch profiler CPU traces for AsyncLLM. (#21794)

Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
2025-12-26 13:03:06 +08:00 · 2025-08-19 19:32:47 -07:00 · 2025-08-19 19:32:47 -07:00 · e58c5a9768
commit e58c5a9768
parent d46d417b58
2 changed files with 35 additions and 4 deletions
--- a/vllm/envs.py
+++ b/vllm/envs.py
@ -667,8 +667,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "VLLM_LORA_RESOLVER_CACHE_DIR":
    lambda: os.getenv("VLLM_LORA_RESOLVER_CACHE_DIR", None),

-    # Enables torch profiler if set. Path to the directory where torch profiler
-    # traces are saved. Note that it must be an absolute path.
+    # Enables torch profiler if set.
+    # Both AsyncLLM's CPU traces as well as workers'
+    # traces (CPU & GPU) will be saved under this directory.
+    # Note that it must be an absolute path.
    "VLLM_TORCH_PROFILER_DIR":
    lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os
             .path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))),
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@ -1,12 +1,15 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import asyncio
+import os
+import socket
 import time
 from collections.abc import AsyncGenerator, Iterable, Mapping
 from copy import copy
 from typing import Any, Optional, Union

 import numpy as np
+import torch

 import vllm.envs as envs
 from vllm.config import ModelConfig, VllmConfig
@ -144,6 +147,26 @@ class AsyncLLM(EngineClient):
        except RuntimeError:
            pass

+        if envs.VLLM_TORCH_PROFILER_DIR:
+            logger.info(
+                "Torch profiler enabled. AsyncLLM CPU traces will be collected under %s",  # noqa: E501
+                envs.VLLM_TORCH_PROFILER_DIR)
+            worker_name = f"{socket.gethostname()}_{os.getpid()}.async_llm"
+            self.profiler = torch.profiler.profile(
+                activities=[
+                    torch.profiler.ProfilerActivity.CPU,
+                ],
+                with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
+                on_trace_ready=torch.profiler.tensorboard_trace_handler(
+                    envs.VLLM_TORCH_PROFILER_DIR,
+                    worker_name=worker_name,
+                    use_gzip=True))
+        else:
+            logger.info(
+                "Torch profiler disabled. AsyncLLM CPU traces will not be collected."  # noqa: E501
+            )
+            self.profiler = None
+
    @classmethod
    @deprecate_kwargs(
        "disable_log_requests",
@ -562,10 +585,16 @@ class AsyncLLM(EngineClient):
            raise self.dead_error

    async def start_profile(self) -> None:
-        await self.engine_core.profile_async(True)
+        coros = [self.engine_core.profile_async(True)]
+        if self.profiler is not None:
+            coros.append(asyncio.to_thread(self.profiler.start))
+        await asyncio.gather(*coros)

    async def stop_profile(self) -> None:
-        await self.engine_core.profile_async(False)
+        coros = [self.engine_core.profile_async(False)]
+        if self.profiler is not None:
+            coros.append(asyncio.to_thread(self.profiler.stop))
+        await asyncio.gather(*coros)

    async def reset_mm_cache(self) -> None:
        self.processor.mm_registry.reset_processor_cache(self.model_config)