diff --git a/vllm/compilation/monitor.py b/vllm/compilation/monitor.py
index c46721ab2d74..35658466d66d 100644
--- a/vllm/compilation/monitor.py
+++ b/vllm/compilation/monitor.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import os
 import time
 
 from vllm.config import CompilationConfig, CompilationLevel, VllmConfig
@@ -18,13 +17,12 @@ def start_monitoring_torch_compile(vllm_config: VllmConfig):
     torch_compile_start_time = time.time()
 
     compilation_config: CompilationConfig = vllm_config.compilation_config
-    if compilation_config.level == CompilationLevel.PIECEWISE and \
-        compilation_config.debug_dump_path:
+    path = vllm_config.compile_debug_dump_path()
+    if compilation_config.level == CompilationLevel.PIECEWISE and path:
         import depyf
-        path = os.path.join(compilation_config.debug_dump_path,
-                            f"rank_{vllm_config.parallel_config.rank}")
+        path.mkdir(parents=True, exist_ok=True)
         global context_manager
-        context_manager = depyf.prepare_debug(path)
+        context_manager = depyf.prepare_debug(path.as_posix())
         context_manager.__enter__()
 
 
diff --git a/vllm/compilation/vllm_inductor_pass.py b/vllm/compilation/vllm_inductor_pass.py
index 837770d18199..59019d74cb80 100644
--- a/vllm/compilation/vllm_inductor_pass.py
+++ b/vllm/compilation/vllm_inductor_pass.py
@@ -3,7 +3,6 @@
 import functools
 import operator
 import time
-from pathlib import Path
 from typing import ClassVar, Optional
 
 import regex as re
@@ -96,12 +95,10 @@ class VllmPatternMatcherPass(VllmInductorPass):
 
         TODO(luka): use pattern object to manually produce pattern graph
         """
-        debug_dump_path = config.compilation_config.debug_dump_path
+        debug_dump_path = config.compile_debug_dump_path()
         if not debug_dump_path:
             return
 
-        rank = config.parallel_config.rank
-        debug_dump_path = Path(debug_dump_path) / f"rank_{rank}"
         debug_dump_path.mkdir(parents=True, exist_ok=True)
 
         from vllm.utils import unique_filepath
diff --git a/vllm/compilation/wrapper.py b/vllm/compilation/wrapper.py
index 930e4d27b410..062c9dc27017 100644
--- a/vllm/compilation/wrapper.py
+++ b/vllm/compilation/wrapper.py
@@ -92,12 +92,11 @@ class TorchCompileWrapperWithCustomDispatcher:
             return
 
         self.compiled_codes.append(new_code)
-        debug_dump_dir = self.vllm_config.compilation_config.debug_dump_path
-        if isinstance(debug_dump_dir, str) and debug_dump_dir != "":
-            rank = self.vllm_config.parallel_config.rank
-            decompiled_file = os.path.join(debug_dump_dir, f"rank_{rank}",
-                                           "transformed_code.py")
-            if not os.path.exists(decompiled_file):
+
+        path = self.vllm_config.compile_debug_dump_path()
+        if path:
+            decompiled_file = path / "transformed_code.py"
+            if not decompiled_file.exists():
                 try:
                     # usually the decompilation will succeed for most models,
                     # as we guarantee a full-graph compilation in Dynamo.
diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py
index ecea90988ebc..ccb91999d370 100644
--- a/vllm/config/__init__.py
+++ b/vllm/config/__init__.py
@@ -12,6 +12,7 @@ import textwrap
 from contextlib import contextmanager
 from dataclasses import field, fields, is_dataclass, replace
 from functools import cached_property, lru_cache
+from pathlib import Path
 from typing import (TYPE_CHECKING, Any, Literal, Optional, Protocol, TypeVar,
                     Union, cast)
 
@@ -541,6 +542,17 @@ class VllmConfig:
                     # local attention.
                     self.scheduler_config.disable_hybrid_kv_cache_manager = True
 
+        if self.compilation_config.debug_dump_path:
+            self.compilation_config.debug_dump_path = \
+                self.compilation_config.debug_dump_path.absolute().expanduser()
+        if envs.VLLM_DEBUG_DUMP_PATH is not None:
+            env_path = Path(envs.VLLM_DEBUG_DUMP_PATH).absolute().expanduser()
+            if self.compilation_config.debug_dump_path:
+                logger.warning(
+                    "Config-specified debug dump path is overridden"
+                    " by VLLM_DEBUG_DUMP_PATH to %s", env_path)
+            self.compilation_config.debug_dump_path = env_path
+
     def update_sizes_for_sequence_parallelism(self,
                                               possible_sizes: list) -> list:
         # remove the sizes that not multiple of tp_size when
@@ -672,6 +684,20 @@ class VllmConfig:
                                  f"but got '{self.load_config.load_format}'. "
                                  f"Model: {self.model_config.model}")
 
+    def compile_debug_dump_path(self) -> Optional[Path]:
+        """Returns a rank-aware path for dumping 
+        torch.compile debug information.
+        """
+        if self.compilation_config.debug_dump_path is None:
+            return None
+        tp_rank = self.parallel_config.rank
+        dp_rank = self.parallel_config.data_parallel_rank
+        data_parallel_size = self.parallel_config.data_parallel_size
+        append_path = f"rank_{tp_rank}" if data_parallel_size == 1 \
+            else f"rank_{tp_rank}_dp_{dp_rank}"
+        path = self.compilation_config.debug_dump_path / append_path
+        return path
+
     def __str__(self):
         return (
             f"model={self.model_config.model!r}, "
diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py
index 9735db98567d..825de7d26191 100644
--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -5,6 +5,7 @@ import enum
 import hashlib
 from collections import Counter
 from dataclasses import asdict, field
+from pathlib import Path
 from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, Union
 
 from pydantic import TypeAdapter, field_validator
@@ -169,7 +170,7 @@ class CompilationConfig:
     - 1: dynamo as is.
     - 2: dynamo once.
     - 3: piecewise compilation."""
-    debug_dump_path: str = ""
+    debug_dump_path: Optional[Path] = None
     """The path to dump the debug information."""
     cache_dir: str = ""
     """The directory to store the compiled graph, to accelerate Inductor
diff --git a/vllm/envs.py b/vllm/envs.py
index 94b0dece9655..854328044304 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -199,6 +199,7 @@ if TYPE_CHECKING:
     VLLM_DBO_COMM_SMS: int = 20
     GPT_OSS_SYSTEM_TOOL_MCP_LABELS: list[str] = []
     VLLM_PATTERN_MATCH_DEBUG: Optional[str] = None
+    VLLM_DEBUG_DUMP_PATH: Optional[str] = None
     VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE: bool = True
     VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING: bool = True
     VLLM_USE_NCCL_SYMM_MEM: bool = False
@@ -513,6 +514,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "VLLM_PATTERN_MATCH_DEBUG":
     lambda: os.environ.get("VLLM_PATTERN_MATCH_DEBUG", None),
 
+    # Dump fx graphs to the given directory.
+    # It will override CompilationConfig.debug_dump_path if set.
+    "VLLM_DEBUG_DUMP_PATH":
+    lambda: os.environ.get("VLLM_DEBUG_DUMP_PATH", None),
+
     # local rank of the process in the distributed setting, used to determine
     # the GPU device id
     "LOCAL_RANK":