[platform] add debug logging during inferring the device type (#14195)

Signed-off-by: youkaichao <youkaichao@gmail.com>
2025-12-21 06:55:01 +08:00 · 2025-03-04 18:39:16 +08:00 · 2025-03-04 18:39:16 +08:00 · ac65bc92df
commit ac65bc92df
parent f78c0be80a
1 changed files with 55 additions and 9 deletions
--- a/vllm/platforms/init.py
+++ b/vllm/platforms/init.py
@ -32,6 +32,7 @@ def vllm_version_matches_substr(substr: str) -> bool:

 def tpu_platform_plugin() -> Optional[str]:
    is_tpu = False
+    logger.debug("Checking if TPU platform is available.")
    try:
        # While it's technically possible to install libtpu on a
        # non-TPU machine, this is a very uncommon scenario. Therefore,
@ -39,7 +40,9 @@ def tpu_platform_plugin() -> Optional[str]:
        # has TPUs.
        import libtpu  # noqa: F401
        is_tpu = True
-    except Exception:
+        logger.debug("Confirmed TPU platform is available.")
+    except Exception as e:
+        logger.debug("TPU platform is not available because: %s", str(e))
        pass

    return "vllm.platforms.tpu.TpuPlatform" if is_tpu else None
@ -47,7 +50,7 @@ def tpu_platform_plugin() -> Optional[str]:

 def cuda_platform_plugin() -> Optional[str]:
    is_cuda = False
-
+    logger.debug("Checking if CUDA platform is available.")
    try:
        from vllm.utils import import_pynvml
        pynvml = import_pynvml()
@ -60,9 +63,19 @@ def cuda_platform_plugin() -> Optional[str]:
            # on a GPU machine, even if in a cpu build.
            is_cuda = (pynvml.nvmlDeviceGetCount() > 0
                       and not vllm_version_matches_substr("cpu"))
+            if pynvml.nvmlDeviceGetCount() <= 0:
+                logger.debug(
+                    "CUDA platform is not available because no GPU is found.")
+            if vllm_version_matches_substr("cpu"):
+                logger.debug("CUDA platform is not available because"
+                             " vLLM is built with CPU.")
+            if is_cuda:
+                logger.debug("Confirmed CUDA platform is available.")
        finally:
            pynvml.nvmlShutdown()
    except Exception as e:
+        logger.debug("Exception happens when checking CUDA platform: %s",
+                     str(e))
        if "nvml" not in e.__class__.__name__.lower():
            # If the error is not related to NVML, re-raise it.
            raise e
@ -75,23 +88,28 @@ def cuda_platform_plugin() -> Optional[str]:
                or os.path.exists("/sys/class/tegra-firmware")

        if cuda_is_jetson():
+            logger.debug("Confirmed CUDA platform is available on Jetson.")
            is_cuda = True
+        else:
+            logger.debug("CUDA platform is not available because: %s", str(e))

    return "vllm.platforms.cuda.CudaPlatform" if is_cuda else None


 def rocm_platform_plugin() -> Optional[str]:
    is_rocm = False
-
+    logger.debug("Checking if ROCm platform is available.")
    try:
        import amdsmi
        amdsmi.amdsmi_init()
        try:
            if len(amdsmi.amdsmi_get_processor_handles()) > 0:
                is_rocm = True
+                logger.debug("Confirmed ROCm platform is available.")
        finally:
            amdsmi.amdsmi_shut_down()
-    except Exception:
+    except Exception as e:
+        logger.debug("ROCm platform is not available because: %s", str(e))
        pass

    return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None
@ -99,10 +117,17 @@ def rocm_platform_plugin() -> Optional[str]:

 def hpu_platform_plugin() -> Optional[str]:
    is_hpu = False
+    logger.debug("Checking if HPU platform is available.")
    try:
        from importlib import util
        is_hpu = util.find_spec('habana_frameworks') is not None
-    except Exception:
+        if is_hpu:
+            logger.debug("Confirmed HPU platform is available.")
+        else:
+            logger.debug("HPU platform is not available because "
+                         "habana_frameworks is not found.")
+    except Exception as e:
+        logger.debug("HPU platform is not available because: %s", str(e))
        pass

    return "vllm.platforms.hpu.HpuPlatform" if is_hpu else None
@ -110,7 +135,7 @@ def hpu_platform_plugin() -> Optional[str]:

 def xpu_platform_plugin() -> Optional[str]:
    is_xpu = False
-
+    logger.debug("Checking if XPU platform is available.")
    try:
        # installed IPEX if the machine has XPUs.
        import intel_extension_for_pytorch  # noqa: F401
@ -118,7 +143,9 @@ def xpu_platform_plugin() -> Optional[str]:
        import torch
        if hasattr(torch, 'xpu') and torch.xpu.is_available():
            is_xpu = True
-    except Exception:
+            logger.debug("Confirmed XPU platform is available.")
+    except Exception as e:
+        logger.debug("XPU platform is not available because: %s", str(e))
        pass

    return "vllm.platforms.xpu.XPUPlatform" if is_xpu else None
@ -126,13 +153,21 @@ def xpu_platform_plugin() -> Optional[str]:

 def cpu_platform_plugin() -> Optional[str]:
    is_cpu = False
+    logger.debug("Checking if CPU platform is available.")
    try:
        is_cpu = vllm_version_matches_substr("cpu")
+        if is_cpu:
+            logger.debug("Confirmed CPU platform is available because"
+                         " vLLM is built with CPU.")
        if not is_cpu:
            import platform
            is_cpu = platform.machine().lower().startswith("arm")
+            if is_cpu:
+                logger.debug("Confirmed CPU platform is available"
+                             " because the machine is ARM.")

-    except Exception:
+    except Exception as e:
+        logger.debug("CPU platform is not available because: %s", str(e))
        pass

    return "vllm.platforms.cpu.CpuPlatform" if is_cpu else None
@ -140,10 +175,14 @@ def cpu_platform_plugin() -> Optional[str]:

 def neuron_platform_plugin() -> Optional[str]:
    is_neuron = False
+    logger.debug("Checking if Neuron platform is available.")
    try:
        import transformers_neuronx  # noqa: F401
        is_neuron = True
-    except ImportError:
+        logger.debug("Confirmed Neuron platform is available because"
+                     " transformers_neuronx is found.")
+    except ImportError as e:
+        logger.debug("Neuron platform is not available because: %s", str(e))
        pass

    return "vllm.platforms.neuron.NeuronPlatform" if is_neuron else None
@ -151,8 +190,15 @@ def neuron_platform_plugin() -> Optional[str]:

 def openvino_platform_plugin() -> Optional[str]:
    is_openvino = False
+    logger.debug("Checking if OpenVINO platform is available.")
    with suppress(Exception):
        is_openvino = vllm_version_matches_substr("openvino")
+        if is_openvino:
+            logger.debug("Confirmed OpenVINO platform is available"
+                         " because vLLM is built with OpenVINO.")
+    if not is_openvino:
+        logger.debug("OpenVINO platform is not available because"
+                     " vLLM is not built with OpenVINO.")

    return "vllm.platforms.openvino.OpenVinoPlatform" if is_openvino else None