diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 5af3b7efed2d6..90795c29ebab7 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -31,7 +31,7 @@ logger = init_logger(__name__) def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface: if compilation_config.use_inductor: if envs.VLLM_USE_STANDALONE_COMPILE and is_torch_equal_or_newer( - "2.8.0"): + "2.8.0a"): logger.debug("Using InductorStandaloneAdaptor") return InductorStandaloneAdaptor() else: diff --git a/vllm/model_executor/layers/quantization/torchao.py b/vllm/model_executor/layers/quantization/torchao.py index af50b45d44b7f..9c909a3a430cb 100644 --- a/vllm/model_executor/layers/quantization/torchao.py +++ b/vllm/model_executor/layers/quantization/torchao.py @@ -44,14 +44,14 @@ class TorchAOConfig(QuantizationConfig): """ # TorchAO quantization relies on tensor subclasses. In order, # to enable proper caching this needs standalone compile - if is_torch_equal_or_newer("2.8.0"): + if is_torch_equal_or_newer("2.8.0a"): os.environ["VLLM_TEST_STANDALONE_COMPILE"] = "1" logger.info( "Using TorchAO: Setting VLLM_TEST_STANDALONE_COMPILE=1") # TODO: remove after the torch dependency is updated to 2.8 if is_torch_equal_or_newer( - "2.7.0") and not is_torch_equal_or_newer("2.8.0"): + "2.7.0") and not is_torch_equal_or_newer("2.8.0a"): os.environ["VLLM_DISABLE_COMPILE_CACHE"] = "1" logger.info("Using TorchAO: Setting VLLM_DISABLE_COMPILE_CACHE=1") """