From c1c8ca57ff53a559a9bdca1ded40960f806c7505 Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Fri, 11 Jul 2025 23:06:13 -0700 Subject: [PATCH] [cold start time] add envs.VLLM_COMPILE_DEPYF to guard decompile (#20790) Signed-off-by: Boyuan Feng --- vllm/compilation/wrapper.py | 16 +++++++++++++--- vllm/envs.py | 6 ++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/vllm/compilation/wrapper.py b/vllm/compilation/wrapper.py index 2a261c84c3fc3..4fd00f0c75b05 100644 --- a/vllm/compilation/wrapper.py +++ b/vllm/compilation/wrapper.py @@ -95,16 +95,26 @@ class TorchCompileWrapperWithCustomDispatcher: self.compiled_codes.append(new_code) local_cache_dir = self.vllm_config.compilation_config.local_cache_dir if isinstance(local_cache_dir, str): + decompiled_file_name = ("transformed_code.py" + if envs.VLLM_COMPILE_DEPYF else + "transformed_code_README.txt") + decompiled_file = os.path.join(local_cache_dir, - "transformed_code.py") + decompiled_file_name) if not os.path.exists(decompiled_file): try: # usually the decompilation will succeed for most models, # as we guarantee a full-graph compilation in Dynamo. # but there's no 100% guarantee, since decompliation is # not a reversible process. - import depyf - src = depyf.decompile(new_code) + if envs.VLLM_COMPILE_DEPYF: + import depyf + src = depyf.decompile(new_code) + else: + src = ( + "To get a transformed_code.py file, re-run with " + "VLLM_COMPILE_DEPYF=1") + with open(decompiled_file, "w") as f: f.write(src) diff --git a/vllm/envs.py b/vllm/envs.py index 7bff6ade81512..7fd5abed7002f 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -97,6 +97,7 @@ if TYPE_CHECKING: VLLM_ENABLE_V1_MULTIPROCESSING: bool = True VLLM_LOG_BATCHSIZE_INTERVAL: float = -1 VLLM_DISABLE_COMPILE_CACHE: bool = False + VLLM_COMPILE_DEPYF: bool = False Q_SCALE_CONSTANT: int = 200 K_SCALE_CONSTANT: int = 200 V_SCALE_CONSTANT: int = 100 @@ -741,6 +742,11 @@ environment_variables: dict[str, Callable[[], Any]] = { "VLLM_DISABLE_COMPILE_CACHE": lambda: bool(int(os.getenv("VLLM_DISABLE_COMPILE_CACHE", "0"))), + # If set, vllm will decompile the torch compiled code and dump to + # transformed_code.py. This is useful for debugging. + "VLLM_COMPILE_DEPYF": + lambda: bool(int(os.getenv("VLLM_COMPILE_DEPYF", "0"))), + # If set, vllm will run in development mode, which will enable # some additional endpoints for developing and debugging, # e.g. `/reset_prefix_cache`