diff --git a/vllm/compilation/monitor.py b/vllm/compilation/monitor.py index c46721ab2d74..35658466d66d 100644 --- a/vllm/compilation/monitor.py +++ b/vllm/compilation/monitor.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import os import time from vllm.config import CompilationConfig, CompilationLevel, VllmConfig @@ -18,13 +17,12 @@ def start_monitoring_torch_compile(vllm_config: VllmConfig): torch_compile_start_time = time.time() compilation_config: CompilationConfig = vllm_config.compilation_config - if compilation_config.level == CompilationLevel.PIECEWISE and \ - compilation_config.debug_dump_path: + path = vllm_config.compile_debug_dump_path() + if compilation_config.level == CompilationLevel.PIECEWISE and path: import depyf - path = os.path.join(compilation_config.debug_dump_path, - f"rank_{vllm_config.parallel_config.rank}") + path.mkdir(parents=True, exist_ok=True) global context_manager - context_manager = depyf.prepare_debug(path) + context_manager = depyf.prepare_debug(path.as_posix()) context_manager.__enter__() diff --git a/vllm/compilation/vllm_inductor_pass.py b/vllm/compilation/vllm_inductor_pass.py index 837770d18199..59019d74cb80 100644 --- a/vllm/compilation/vllm_inductor_pass.py +++ b/vllm/compilation/vllm_inductor_pass.py @@ -3,7 +3,6 @@ import functools import operator import time -from pathlib import Path from typing import ClassVar, Optional import regex as re @@ -96,12 +95,10 @@ class VllmPatternMatcherPass(VllmInductorPass): TODO(luka): use pattern object to manually produce pattern graph """ - debug_dump_path = config.compilation_config.debug_dump_path + debug_dump_path = config.compile_debug_dump_path() if not debug_dump_path: return - rank = config.parallel_config.rank - debug_dump_path = Path(debug_dump_path) / f"rank_{rank}" debug_dump_path.mkdir(parents=True, exist_ok=True) from vllm.utils import unique_filepath diff --git a/vllm/compilation/wrapper.py b/vllm/compilation/wrapper.py index 930e4d27b410..062c9dc27017 100644 --- a/vllm/compilation/wrapper.py +++ b/vllm/compilation/wrapper.py @@ -92,12 +92,11 @@ class TorchCompileWrapperWithCustomDispatcher: return self.compiled_codes.append(new_code) - debug_dump_dir = self.vllm_config.compilation_config.debug_dump_path - if isinstance(debug_dump_dir, str) and debug_dump_dir != "": - rank = self.vllm_config.parallel_config.rank - decompiled_file = os.path.join(debug_dump_dir, f"rank_{rank}", - "transformed_code.py") - if not os.path.exists(decompiled_file): + + path = self.vllm_config.compile_debug_dump_path() + if path: + decompiled_file = path / "transformed_code.py" + if not decompiled_file.exists(): try: # usually the decompilation will succeed for most models, # as we guarantee a full-graph compilation in Dynamo. diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index ecea90988ebc..ccb91999d370 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -12,6 +12,7 @@ import textwrap from contextlib import contextmanager from dataclasses import field, fields, is_dataclass, replace from functools import cached_property, lru_cache +from pathlib import Path from typing import (TYPE_CHECKING, Any, Literal, Optional, Protocol, TypeVar, Union, cast) @@ -541,6 +542,17 @@ class VllmConfig: # local attention. self.scheduler_config.disable_hybrid_kv_cache_manager = True + if self.compilation_config.debug_dump_path: + self.compilation_config.debug_dump_path = \ + self.compilation_config.debug_dump_path.absolute().expanduser() + if envs.VLLM_DEBUG_DUMP_PATH is not None: + env_path = Path(envs.VLLM_DEBUG_DUMP_PATH).absolute().expanduser() + if self.compilation_config.debug_dump_path: + logger.warning( + "Config-specified debug dump path is overridden" + " by VLLM_DEBUG_DUMP_PATH to %s", env_path) + self.compilation_config.debug_dump_path = env_path + def update_sizes_for_sequence_parallelism(self, possible_sizes: list) -> list: # remove the sizes that not multiple of tp_size when @@ -672,6 +684,20 @@ class VllmConfig: f"but got '{self.load_config.load_format}'. " f"Model: {self.model_config.model}") + def compile_debug_dump_path(self) -> Optional[Path]: + """Returns a rank-aware path for dumping + torch.compile debug information. + """ + if self.compilation_config.debug_dump_path is None: + return None + tp_rank = self.parallel_config.rank + dp_rank = self.parallel_config.data_parallel_rank + data_parallel_size = self.parallel_config.data_parallel_size + append_path = f"rank_{tp_rank}" if data_parallel_size == 1 \ + else f"rank_{tp_rank}_dp_{dp_rank}" + path = self.compilation_config.debug_dump_path / append_path + return path + def __str__(self): return ( f"model={self.model_config.model!r}, " diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 9735db98567d..825de7d26191 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -5,6 +5,7 @@ import enum import hashlib from collections import Counter from dataclasses import asdict, field +from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, Union from pydantic import TypeAdapter, field_validator @@ -169,7 +170,7 @@ class CompilationConfig: - 1: dynamo as is. - 2: dynamo once. - 3: piecewise compilation.""" - debug_dump_path: str = "" + debug_dump_path: Optional[Path] = None """The path to dump the debug information.""" cache_dir: str = "" """The directory to store the compiled graph, to accelerate Inductor diff --git a/vllm/envs.py b/vllm/envs.py index 94b0dece9655..854328044304 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -199,6 +199,7 @@ if TYPE_CHECKING: VLLM_DBO_COMM_SMS: int = 20 GPT_OSS_SYSTEM_TOOL_MCP_LABELS: list[str] = [] VLLM_PATTERN_MATCH_DEBUG: Optional[str] = None + VLLM_DEBUG_DUMP_PATH: Optional[str] = None VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE: bool = True VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING: bool = True VLLM_USE_NCCL_SYMM_MEM: bool = False @@ -513,6 +514,11 @@ environment_variables: dict[str, Callable[[], Any]] = { "VLLM_PATTERN_MATCH_DEBUG": lambda: os.environ.get("VLLM_PATTERN_MATCH_DEBUG", None), + # Dump fx graphs to the given directory. + # It will override CompilationConfig.debug_dump_path if set. + "VLLM_DEBUG_DUMP_PATH": + lambda: os.environ.get("VLLM_DEBUG_DUMP_PATH", None), + # local rank of the process in the distributed setting, used to determine # the GPU device id "LOCAL_RANK":