Use standalone_compile by default in torch >= 2.8.0 (#18846)

Signed-off-by: rzou <zou3519@gmail.com>
This commit is contained in:
Richard Zou 2025-05-29 18:41:58 -04:00 committed by GitHub
parent 64eaf5fe05
commit a521ef06e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 12 additions and 9 deletions

View File

@ -16,7 +16,7 @@ import vllm.envs as envs
from vllm.config import CompilationConfig, VllmConfig from vllm.config import CompilationConfig, VllmConfig
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import resolve_obj_by_qualname from vllm.utils import is_torch_equal_or_newer, resolve_obj_by_qualname
from .compiler_interface import (CompilerInterface, EagerAdaptor, from .compiler_interface import (CompilerInterface, EagerAdaptor,
InductorAdaptor, InductorStandaloneAdaptor) InductorAdaptor, InductorStandaloneAdaptor)
@ -29,7 +29,8 @@ logger = init_logger(__name__)
def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface: def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface:
if compilation_config.use_inductor: if compilation_config.use_inductor:
if envs.VLLM_TEST_STANDALONE_COMPILE: if envs.VLLM_USE_STANDALONE_COMPILE and is_torch_equal_or_newer(
"2.8.0"):
logger.info("Using InductorStandaloneAdaptor") logger.info("Using InductorStandaloneAdaptor")
return InductorStandaloneAdaptor() return InductorStandaloneAdaptor()
else: else:

View File

@ -155,7 +155,7 @@ class InductorStandaloneAdaptor(CompilerInterface):
This is not on by default yet, but we plan to turn it on by default for This is not on by default yet, but we plan to turn it on by default for
PyTorch 2.8. PyTorch 2.8.
Use VLLM_TEST_STANDALONE_COMPILE to toggle this on or off. Use VLLM_USE_STANDALONE_COMPILE to toggle this on or off.
""" """
name = "inductor_standalone" name = "inductor_standalone"

View File

@ -308,9 +308,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
lambda: bool( lambda: bool(
os.environ.get("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"), os.environ.get("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"),
# Internal flag to enable/disable Inductor standalone compile # Feature flag to enable/disable Inductor standalone compile.
"VLLM_TEST_STANDALONE_COMPILE": # In torch <= 2.7 we ignore this flag; in torch >= 2.8 this is
lambda: os.environ.get("VLLM_TEST_STANDALONE_COMPILE", "0") != "0", # enabled by default.
"VLLM_USE_STANDALONE_COMPILE":
lambda: os.environ.get("VLLM_USE_STANDALONE_COMPILE", "1") == "1",
# local rank of the process in the distributed setting, used to determine # local rank of the process in the distributed setting, used to determine
# the GPU device id # the GPU device id
@ -892,7 +894,7 @@ def compute_hash() -> str:
"VLLM_USE_TRITON_AWQ", "VLLM_USE_TRITON_AWQ",
"VLLM_DP_RANK", "VLLM_DP_RANK",
"VLLM_DP_SIZE", "VLLM_DP_SIZE",
"VLLM_TEST_STANDALONE_COMPILE", "VLLM_USE_STANDALONE_COMPILE",
] ]
for key in environment_variables_to_hash: for key in environment_variables_to_hash:
if key in environment_variables: if key in environment_variables: