diff --git a/vllm/compilation/wrapper.py b/vllm/compilation/wrapper.py index 4fd00f0c75b0..8d5df1061eda 100644 --- a/vllm/compilation/wrapper.py +++ b/vllm/compilation/wrapper.py @@ -93,27 +93,19 @@ class TorchCompileWrapperWithCustomDispatcher: return self.compiled_codes.append(new_code) - local_cache_dir = self.vllm_config.compilation_config.local_cache_dir - if isinstance(local_cache_dir, str): - decompiled_file_name = ("transformed_code.py" - if envs.VLLM_COMPILE_DEPYF else - "transformed_code_README.txt") - - decompiled_file = os.path.join(local_cache_dir, - decompiled_file_name) + debug_dump_dir = self.vllm_config.compilation_config.debug_dump_path + if isinstance(debug_dump_dir, str) and debug_dump_dir != "": + rank = self.vllm_config.parallel_config.rank + decompiled_file = os.path.join(debug_dump_dir, f"rank_{rank}", + "transformed_code.py") if not os.path.exists(decompiled_file): try: # usually the decompilation will succeed for most models, # as we guarantee a full-graph compilation in Dynamo. # but there's no 100% guarantee, since decompliation is # not a reversible process. - if envs.VLLM_COMPILE_DEPYF: - import depyf - src = depyf.decompile(new_code) - else: - src = ( - "To get a transformed_code.py file, re-run with " - "VLLM_COMPILE_DEPYF=1") + import depyf + src = depyf.decompile(new_code) with open(decompiled_file, "w") as f: f.write(src) diff --git a/vllm/envs.py b/vllm/envs.py index 7fd5abed7002..7bff6ade8151 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -97,7 +97,6 @@ if TYPE_CHECKING: VLLM_ENABLE_V1_MULTIPROCESSING: bool = True VLLM_LOG_BATCHSIZE_INTERVAL: float = -1 VLLM_DISABLE_COMPILE_CACHE: bool = False - VLLM_COMPILE_DEPYF: bool = False Q_SCALE_CONSTANT: int = 200 K_SCALE_CONSTANT: int = 200 V_SCALE_CONSTANT: int = 100 @@ -742,11 +741,6 @@ environment_variables: dict[str, Callable[[], Any]] = { "VLLM_DISABLE_COMPILE_CACHE": lambda: bool(int(os.getenv("VLLM_DISABLE_COMPILE_CACHE", "0"))), - # If set, vllm will decompile the torch compiled code and dump to - # transformed_code.py. This is useful for debugging. - "VLLM_COMPILE_DEPYF": - lambda: bool(int(os.getenv("VLLM_COMPILE_DEPYF", "0"))), - # If set, vllm will run in development mode, which will enable # some additional endpoints for developing and debugging, # e.g. `/reset_prefix_cache`