diff --git a/vllm/v1/worker/cpu_model_runner.py b/vllm/v1/worker/cpu_model_runner.py index 226d7792a42f7..360a626979e54 100644 --- a/vllm/v1/worker/cpu_model_runner.py +++ b/vllm/v1/worker/cpu_model_runner.py @@ -128,8 +128,8 @@ def _torch_cuda_wrapper(): self.record = lambda: None self.synchronize = lambda: None + cuda_event = torch.cuda.Event try: - cuda_event = torch.cuda.Event torch.cuda.Event = _EventPlaceholder yield finally: @@ -141,9 +141,9 @@ def _set_global_compilation_settings(config: VllmConfig): import torch._inductor.config inductor_config = config.compilation_config.inductor_compile_config + # Note: The MKLDNN and CPPGEMM backend requires freezing parameters. + freezing_value = torch._inductor.config.freezing try: - # Note: The MKLDNN and CPPGEMM backend requires freezing parameters. - freezing_value = torch._inductor.config.freezing if inductor_config.get("max_autotune", False): torch._inductor.config.freezing = True yield