diff --git a/vllm/triton_utils/importing.py b/vllm/triton_utils/importing.py index a003e4eb02c0..21beb76f37cc 100644 --- a/vllm/triton_utils/importing.py +++ b/vllm/triton_utils/importing.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import os import types from importlib.util import find_spec @@ -23,7 +24,22 @@ if HAS_TRITON: x.driver for x in backends.values() if x.driver and x.driver.is_active() ] - if len(active_drivers) != 1: + + # Check if we're in a distributed environment where CUDA_VISIBLE_DEVICES + # might be temporarily empty (e.g., Ray sets it to "" during actor init) + cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES") + is_distributed_env = (cuda_visible_devices is not None + and len(cuda_visible_devices.strip()) == 0) + + # Apply lenient driver check for distributed environments + if is_distributed_env and len(active_drivers) == 0: + # Allow 0 drivers in distributed environments - they may become + # active later when CUDA context is properly initialized + logger.debug( + "Triton found 0 active drivers in distributed environment. " + "This is expected during initialization.") + elif not is_distributed_env and len(active_drivers) != 1: + # Strict check for non-distributed environments logger.info( "Triton is installed but %d active driver(s) found " "(expected 1). Disabling Triton to prevent runtime errors.",