mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 16:35:40 +08:00
[cold start] replace VLLM_COMPILE_DEPYF with debug_dump_dir (#20940)
Signed-off-by: Boyuan Feng <boyuan@meta.com>
This commit is contained in:
parent
fc017915f5
commit
91b3d190ae
@ -93,27 +93,19 @@ class TorchCompileWrapperWithCustomDispatcher:
|
|||||||
return
|
return
|
||||||
|
|
||||||
self.compiled_codes.append(new_code)
|
self.compiled_codes.append(new_code)
|
||||||
local_cache_dir = self.vllm_config.compilation_config.local_cache_dir
|
debug_dump_dir = self.vllm_config.compilation_config.debug_dump_path
|
||||||
if isinstance(local_cache_dir, str):
|
if isinstance(debug_dump_dir, str) and debug_dump_dir != "":
|
||||||
decompiled_file_name = ("transformed_code.py"
|
rank = self.vllm_config.parallel_config.rank
|
||||||
if envs.VLLM_COMPILE_DEPYF else
|
decompiled_file = os.path.join(debug_dump_dir, f"rank_{rank}",
|
||||||
"transformed_code_README.txt")
|
"transformed_code.py")
|
||||||
|
|
||||||
decompiled_file = os.path.join(local_cache_dir,
|
|
||||||
decompiled_file_name)
|
|
||||||
if not os.path.exists(decompiled_file):
|
if not os.path.exists(decompiled_file):
|
||||||
try:
|
try:
|
||||||
# usually the decompilation will succeed for most models,
|
# usually the decompilation will succeed for most models,
|
||||||
# as we guarantee a full-graph compilation in Dynamo.
|
# as we guarantee a full-graph compilation in Dynamo.
|
||||||
# but there's no 100% guarantee, since decompliation is
|
# but there's no 100% guarantee, since decompliation is
|
||||||
# not a reversible process.
|
# not a reversible process.
|
||||||
if envs.VLLM_COMPILE_DEPYF:
|
import depyf
|
||||||
import depyf
|
src = depyf.decompile(new_code)
|
||||||
src = depyf.decompile(new_code)
|
|
||||||
else:
|
|
||||||
src = (
|
|
||||||
"To get a transformed_code.py file, re-run with "
|
|
||||||
"VLLM_COMPILE_DEPYF=1")
|
|
||||||
|
|
||||||
with open(decompiled_file, "w") as f:
|
with open(decompiled_file, "w") as f:
|
||||||
f.write(src)
|
f.write(src)
|
||||||
|
|||||||
@ -97,7 +97,6 @@ if TYPE_CHECKING:
|
|||||||
VLLM_ENABLE_V1_MULTIPROCESSING: bool = True
|
VLLM_ENABLE_V1_MULTIPROCESSING: bool = True
|
||||||
VLLM_LOG_BATCHSIZE_INTERVAL: float = -1
|
VLLM_LOG_BATCHSIZE_INTERVAL: float = -1
|
||||||
VLLM_DISABLE_COMPILE_CACHE: bool = False
|
VLLM_DISABLE_COMPILE_CACHE: bool = False
|
||||||
VLLM_COMPILE_DEPYF: bool = False
|
|
||||||
Q_SCALE_CONSTANT: int = 200
|
Q_SCALE_CONSTANT: int = 200
|
||||||
K_SCALE_CONSTANT: int = 200
|
K_SCALE_CONSTANT: int = 200
|
||||||
V_SCALE_CONSTANT: int = 100
|
V_SCALE_CONSTANT: int = 100
|
||||||
@ -742,11 +741,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
"VLLM_DISABLE_COMPILE_CACHE":
|
"VLLM_DISABLE_COMPILE_CACHE":
|
||||||
lambda: bool(int(os.getenv("VLLM_DISABLE_COMPILE_CACHE", "0"))),
|
lambda: bool(int(os.getenv("VLLM_DISABLE_COMPILE_CACHE", "0"))),
|
||||||
|
|
||||||
# If set, vllm will decompile the torch compiled code and dump to
|
|
||||||
# transformed_code.py. This is useful for debugging.
|
|
||||||
"VLLM_COMPILE_DEPYF":
|
|
||||||
lambda: bool(int(os.getenv("VLLM_COMPILE_DEPYF", "0"))),
|
|
||||||
|
|
||||||
# If set, vllm will run in development mode, which will enable
|
# If set, vllm will run in development mode, which will enable
|
||||||
# some additional endpoints for developing and debugging,
|
# some additional endpoints for developing and debugging,
|
||||||
# e.g. `/reset_prefix_cache`
|
# e.g. `/reset_prefix_cache`
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user