From c1c8ca57ff53a559a9bdca1ded40960f806c7505 Mon Sep 17 00:00:00 2001
From: Boyuan Feng <boyuan@meta.com>
Date: Fri, 11 Jul 2025 23:06:13 -0700
Subject: [PATCH] [cold start time] add envs.VLLM_COMPILE_DEPYF to guard
 decompile (#20790)

Signed-off-by: Boyuan Feng <boyuan@meta.com>
---
 vllm/compilation/wrapper.py | 16 +++++++++++++---
 vllm/envs.py                |  6 ++++++
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/vllm/compilation/wrapper.py b/vllm/compilation/wrapper.py
index 2a261c84c3fc3..4fd00f0c75b05 100644
--- a/vllm/compilation/wrapper.py
+++ b/vllm/compilation/wrapper.py
@@ -95,16 +95,26 @@ class TorchCompileWrapperWithCustomDispatcher:
         self.compiled_codes.append(new_code)
         local_cache_dir = self.vllm_config.compilation_config.local_cache_dir
         if isinstance(local_cache_dir, str):
+            decompiled_file_name = ("transformed_code.py"
+                                    if envs.VLLM_COMPILE_DEPYF else
+                                    "transformed_code_README.txt")
+
             decompiled_file = os.path.join(local_cache_dir,
-                                           "transformed_code.py")
+                                           decompiled_file_name)
             if not os.path.exists(decompiled_file):
                 try:
                     # usually the decompilation will succeed for most models,
                     # as we guarantee a full-graph compilation in Dynamo.
                     # but there's no 100% guarantee, since decompliation is
                     # not a reversible process.
-                    import depyf
-                    src = depyf.decompile(new_code)
+                    if envs.VLLM_COMPILE_DEPYF:
+                        import depyf
+                        src = depyf.decompile(new_code)
+                    else:
+                        src = (
+                            "To get a transformed_code.py file, re-run with "
+                            "VLLM_COMPILE_DEPYF=1")
+
                     with open(decompiled_file, "w") as f:
                         f.write(src)
 
diff --git a/vllm/envs.py b/vllm/envs.py
index 7bff6ade81512..7fd5abed7002f 100644
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -97,6 +97,7 @@ if TYPE_CHECKING:
     VLLM_ENABLE_V1_MULTIPROCESSING: bool = True
     VLLM_LOG_BATCHSIZE_INTERVAL: float = -1
     VLLM_DISABLE_COMPILE_CACHE: bool = False
+    VLLM_COMPILE_DEPYF: bool = False
     Q_SCALE_CONSTANT: int = 200
     K_SCALE_CONSTANT: int = 200
     V_SCALE_CONSTANT: int = 100
@@ -741,6 +742,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "VLLM_DISABLE_COMPILE_CACHE":
     lambda: bool(int(os.getenv("VLLM_DISABLE_COMPILE_CACHE", "0"))),
 
+    # If set, vllm will decompile the torch compiled code and dump to
+    # transformed_code.py. This is useful for debugging.
+    "VLLM_COMPILE_DEPYF":
+    lambda: bool(int(os.getenv("VLLM_COMPILE_DEPYF", "0"))),
+
     # If set, vllm will run in development mode, which will enable
     # some additional endpoints for developing and debugging,
     # e.g. `/reset_prefix_cache`