From 7341c77d693edcecf0a9f5a6e399c5137177dfba Mon Sep 17 00:00:00 2001
From: Nick Hill <nickhill@us.ibm.com>
Date: Mon, 18 Mar 2024 23:05:20 -0700
Subject: [PATCH] [BugFix] Avoid initializing CUDA too early (#3487)

---
 vllm/config.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index f792e89095246..51ae66e2375ab 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -577,12 +577,12 @@ class DeviceConfig:
     def __init__(self, device: str = "auto") -> None:
         if device == "auto":
             # Automated device type detection
-            if torch.cuda.is_available():
-                self.device_type = "cuda"
-            elif is_neuron():
+            if is_neuron():
                 self.device_type = "neuron"
             else:
-                raise RuntimeError("No supported device detected.")
+                # We don't call torch.cuda.is_available() here to
+                # avoid initializing CUDA before workers are forked
+                self.device_type = "cuda"
         else:
             # Device type is assigned explicitly
             self.device_type = device