Add VLLM_NVTX_SCOPES_FOR_PROFILING=1 to enable nvtx.annotate scopes (#25501)

Signed-off-by: Corey Lowman <clowman1993@gmail.com> Signed-off-by: yewentao256 <zhyanwentao@126.com>
2026-01-06 10:35:18 +08:00 · 2025-09-23 18:50:09 -04:00 · 2025-09-23 18:50:09 -04:00 · 4ebc513fc1
commit 4ebc513fc1
parent 7a8f0a3548
2 changed files with 22 additions and 3 deletions
--- a/vllm/envs.py
+++ b/vllm/envs.py
@ -187,6 +187,7 @@ if TYPE_CHECKING:
    VLLM_DISABLE_PAD_FOR_CUDAGRAPH: bool = False
    VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False
    VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False
+    VLLM_NVTX_SCOPES_FOR_PROFILING: bool = False
    VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True
    VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME: str = "VLLM_OBJECT_STORAGE_SHM_BUFFER"
    VLLM_DEEPEP_BUFFER_SIZE_MB: int = 1024
@ -1387,6 +1388,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "VLLM_CUSTOM_SCOPES_FOR_PROFILING":
    lambda: bool(int(os.getenv("VLLM_CUSTOM_SCOPES_FOR_PROFILING", "0"))),

+    # Add optional nvtx scopes for profiling, disable to avoid overheads
+    "VLLM_NVTX_SCOPES_FOR_PROFILING":
+    lambda: bool(int(os.getenv("VLLM_NVTX_SCOPES_FOR_PROFILING", "0"))),
+
    # Represent block hashes in KV cache events as 64-bit integers instead of
    # raw bytes. Defaults to True for backward compatibility.
    "VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES":
--- a/vllm/v1/utils.py
+++ b/vllm/v1/utils.py
@ -375,8 +375,22 @@ def report_usage_stats(
        })


+_PROFILER_FUNC = None
+
+
 def record_function_or_nullcontext(name: str) -> AbstractContextManager:
+    global _PROFILER_FUNC
+
+    # fast path assume it is set
+    if _PROFILER_FUNC is not None:
+        return _PROFILER_FUNC(name)
+
+    func = contextlib.nullcontext
    if envs.VLLM_CUSTOM_SCOPES_FOR_PROFILING:
-        return record_function(name)
-    else:
-        return contextlib.nullcontext()
+        func = record_function
+    elif envs.VLLM_NVTX_SCOPES_FOR_PROFILING:
+        import nvtx
+        func = nvtx.annotate
+
+    _PROFILER_FUNC = func
+    return func(name)