diff --git a/vllm/v1/worker/cpu_model_runner.py b/vllm/v1/worker/cpu_model_runner.py index 360a626979e54..feb49978d7518 100644 --- a/vllm/v1/worker/cpu_model_runner.py +++ b/vllm/v1/worker/cpu_model_runner.py @@ -138,14 +138,14 @@ def _torch_cuda_wrapper(): @contextmanager def _set_global_compilation_settings(config: VllmConfig): - import torch._inductor.config + import torch._inductor.config as torch_inductor_config inductor_config = config.compilation_config.inductor_compile_config # Note: The MKLDNN and CPPGEMM backend requires freezing parameters. - freezing_value = torch._inductor.config.freezing + freezing_value = torch_inductor_config.freezing try: if inductor_config.get("max_autotune", False): - torch._inductor.config.freezing = True + torch_inductor_config.freezing = True yield finally: - torch._inductor.config.freezing = freezing_value + torch_inductor_config.freezing = freezing_value