From d67cc21b787dc594f6b168c4e44a0c6ecb415385 Mon Sep 17 00:00:00 2001
From: Isotr0py <mozf@mail2.sysu.edu.cn>
Date: Mon, 17 Feb 2025 02:55:27 +0800
Subject: [PATCH] [Bugfix][Platform][CPU] Fix cuda platform detection on CPU
 backend edge case (#13358)

Signed-off-by: Isotr0py <2037008807@qq.com>
---
 vllm/platforms/__init__.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py
index e4767a378f45f..724c4357ff740 100644
--- a/vllm/platforms/__init__.py
+++ b/vllm/platforms/__init__.py
@@ -33,12 +33,19 @@ def cuda_platform_plugin() -> Optional[str]:
     is_cuda = False
 
     try:
+        from importlib.metadata import version
+
         from vllm.utils import import_pynvml
         pynvml = import_pynvml()
         pynvml.nvmlInit()
         try:
-            if pynvml.nvmlDeviceGetCount() > 0:
-                is_cuda = True
+            # NOTE: Edge case: vllm cpu build on a GPU machine.
+            # Third-party pynvml can be imported in cpu build,
+            # we need to check if vllm is built with cpu too.
+            # Otherwise, vllm will always activate cuda plugin
+            # on a GPU machine, even if in a cpu build.
+            is_cuda = (pynvml.nvmlDeviceGetCount() > 0
+                       and "cpu" not in version("vllm"))
         finally:
             pynvml.nvmlShutdown()
     except Exception as e: