From a58f24e590c1c4be2d1398f62f119a795e79d833 Mon Sep 17 00:00:00 2001 From: zifeitong Date: Mon, 3 Jun 2024 20:55:50 -0700 Subject: [PATCH] [Bugfix] Fix torch.compile() error when using MultiprocessingGPUExecutor (#5229) --- vllm/executor/multiproc_gpu_executor.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vllm/executor/multiproc_gpu_executor.py b/vllm/executor/multiproc_gpu_executor.py index 8fa54454907b..bd1cac2ab9b5 100644 --- a/vllm/executor/multiproc_gpu_executor.py +++ b/vllm/executor/multiproc_gpu_executor.py @@ -34,6 +34,9 @@ class MultiprocessingGPUExecutor(DistributedGPUExecutor): # Ensure that VLLM_INSTANCE_ID is set, to be inherited by workers os.environ["VLLM_INSTANCE_ID"] = get_vllm_instance_id() + # Disable torch async compiling which won't work with daemonic processes + os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1" + from torch.cuda import device_count assert world_size <= device_count(), ( "please set tensor_parallel_size to less than max local gpu count")