From a58f24e590c1c4be2d1398f62f119a795e79d833 Mon Sep 17 00:00:00 2001
From: zifeitong <zifei.tong@parasail.io>
Date: Mon, 3 Jun 2024 20:55:50 -0700
Subject: [PATCH] [Bugfix] Fix torch.compile() error when using
 MultiprocessingGPUExecutor (#5229)

---
 vllm/executor/multiproc_gpu_executor.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vllm/executor/multiproc_gpu_executor.py b/vllm/executor/multiproc_gpu_executor.py
index 8fa54454907b..bd1cac2ab9b5 100644
--- a/vllm/executor/multiproc_gpu_executor.py
+++ b/vllm/executor/multiproc_gpu_executor.py
@@ -34,6 +34,9 @@ class MultiprocessingGPUExecutor(DistributedGPUExecutor):
         # Ensure that VLLM_INSTANCE_ID is set, to be inherited by workers
         os.environ["VLLM_INSTANCE_ID"] = get_vllm_instance_id()
 
+        # Disable torch async compiling which won't work with daemonic processes
+        os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1"
+
         from torch.cuda import device_count
         assert world_size <= device_count(), (
             "please set tensor_parallel_size to less than max local gpu count")