From a521ef06e5eb18a34d665282fa38c4768a855bb8 Mon Sep 17 00:00:00 2001 From: Richard Zou Date: Thu, 29 May 2025 18:41:58 -0400 Subject: [PATCH] Use standalone_compile by default in torch >= 2.8.0 (#18846) Signed-off-by: rzou --- vllm/compilation/backends.py | 5 +++-- vllm/compilation/compiler_interface.py | 2 +- vllm/envs.py | 14 ++++++++------ 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 0358c9d0d1b5..b724479a95de 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -16,7 +16,7 @@ import vllm.envs as envs from vllm.config import CompilationConfig, VllmConfig from vllm.logger import init_logger from vllm.platforms import current_platform -from vllm.utils import resolve_obj_by_qualname +from vllm.utils import is_torch_equal_or_newer, resolve_obj_by_qualname from .compiler_interface import (CompilerInterface, EagerAdaptor, InductorAdaptor, InductorStandaloneAdaptor) @@ -29,7 +29,8 @@ logger = init_logger(__name__) def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface: if compilation_config.use_inductor: - if envs.VLLM_TEST_STANDALONE_COMPILE: + if envs.VLLM_USE_STANDALONE_COMPILE and is_torch_equal_or_newer( + "2.8.0"): logger.info("Using InductorStandaloneAdaptor") return InductorStandaloneAdaptor() else: diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py index 7e9186f8613c..8fa8ce279deb 100644 --- a/vllm/compilation/compiler_interface.py +++ b/vllm/compilation/compiler_interface.py @@ -155,7 +155,7 @@ class InductorStandaloneAdaptor(CompilerInterface): This is not on by default yet, but we plan to turn it on by default for PyTorch 2.8. - Use VLLM_TEST_STANDALONE_COMPILE to toggle this on or off. + Use VLLM_USE_STANDALONE_COMPILE to toggle this on or off. """ name = "inductor_standalone" diff --git a/vllm/envs.py b/vllm/envs.py index bd9104afa4aa..785fe73098be 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -143,10 +143,10 @@ def maybe_convert_int(value: Optional[str]) -> Optional[int]: def get_vllm_port() -> Optional[int]: """Get the port from VLLM_PORT environment variable. - + Returns: The port number as an integer if VLLM_PORT is set, None otherwise. - + Raises: ValueError: If VLLM_PORT is a URI, suggest k8s service discovery issue. """ @@ -308,9 +308,11 @@ environment_variables: dict[str, Callable[[], Any]] = { lambda: bool( os.environ.get("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"), - # Internal flag to enable/disable Inductor standalone compile - "VLLM_TEST_STANDALONE_COMPILE": - lambda: os.environ.get("VLLM_TEST_STANDALONE_COMPILE", "0") != "0", + # Feature flag to enable/disable Inductor standalone compile. + # In torch <= 2.7 we ignore this flag; in torch >= 2.8 this is + # enabled by default. + "VLLM_USE_STANDALONE_COMPILE": + lambda: os.environ.get("VLLM_USE_STANDALONE_COMPILE", "1") == "1", # local rank of the process in the distributed setting, used to determine # the GPU device id @@ -892,7 +894,7 @@ def compute_hash() -> str: "VLLM_USE_TRITON_AWQ", "VLLM_DP_RANK", "VLLM_DP_SIZE", - "VLLM_TEST_STANDALONE_COMPILE", + "VLLM_USE_STANDALONE_COMPILE", ] for key in environment_variables_to_hash: if key in environment_variables: