Add VLLM_NVTX_SCOPES_FOR_PROFILING=1 to enable nvtx.annotate scopes (#25501)

Signed-off-by: Corey Lowman <clowman1993@gmail.com>
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Corey Lowman 2025-09-23 18:50:09 -04:00 committed by yewentao256
parent 7a8f0a3548
commit 4ebc513fc1
2 changed files with 22 additions and 3 deletions

View File

@ -187,6 +187,7 @@ if TYPE_CHECKING:
VLLM_DISABLE_PAD_FOR_CUDAGRAPH: bool = False
VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False
VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False
VLLM_NVTX_SCOPES_FOR_PROFILING: bool = False
VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True
VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME: str = "VLLM_OBJECT_STORAGE_SHM_BUFFER"
VLLM_DEEPEP_BUFFER_SIZE_MB: int = 1024
@ -1387,6 +1388,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_CUSTOM_SCOPES_FOR_PROFILING":
lambda: bool(int(os.getenv("VLLM_CUSTOM_SCOPES_FOR_PROFILING", "0"))),
# Add optional nvtx scopes for profiling, disable to avoid overheads
"VLLM_NVTX_SCOPES_FOR_PROFILING":
lambda: bool(int(os.getenv("VLLM_NVTX_SCOPES_FOR_PROFILING", "0"))),
# Represent block hashes in KV cache events as 64-bit integers instead of
# raw bytes. Defaults to True for backward compatibility.
"VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES":

View File

@ -375,8 +375,22 @@ def report_usage_stats(
})
_PROFILER_FUNC = None
def record_function_or_nullcontext(name: str) -> AbstractContextManager:
global _PROFILER_FUNC
# fast path assume it is set
if _PROFILER_FUNC is not None:
return _PROFILER_FUNC(name)
func = contextlib.nullcontext
if envs.VLLM_CUSTOM_SCOPES_FOR_PROFILING:
return record_function(name)
else:
return contextlib.nullcontext()
func = record_function
elif envs.VLLM_NVTX_SCOPES_FOR_PROFILING:
import nvtx
func = nvtx.annotate
_PROFILER_FUNC = func
return func(name)