From d67cc21b787dc594f6b168c4e44a0c6ecb415385 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Mon, 17 Feb 2025 02:55:27 +0800 Subject: [PATCH] [Bugfix][Platform][CPU] Fix cuda platform detection on CPU backend edge case (#13358) Signed-off-by: Isotr0py <2037008807@qq.com> --- vllm/platforms/__init__.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py index e4767a378f45f..724c4357ff740 100644 --- a/vllm/platforms/__init__.py +++ b/vllm/platforms/__init__.py @@ -33,12 +33,19 @@ def cuda_platform_plugin() -> Optional[str]: is_cuda = False try: + from importlib.metadata import version + from vllm.utils import import_pynvml pynvml = import_pynvml() pynvml.nvmlInit() try: - if pynvml.nvmlDeviceGetCount() > 0: - is_cuda = True + # NOTE: Edge case: vllm cpu build on a GPU machine. + # Third-party pynvml can be imported in cpu build, + # we need to check if vllm is built with cpu too. + # Otherwise, vllm will always activate cuda plugin + # on a GPU machine, even if in a cpu build. + is_cuda = (pynvml.nvmlDeviceGetCount() > 0 + and "cpu" not in version("vllm")) finally: pynvml.nvmlShutdown() except Exception as e: