diff --git a/vllm/config.py b/vllm/config.py index d649eb75033f..6623a48f839a 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -2137,10 +2137,11 @@ class ParallelConfig: elif (current_platform.is_cuda() and cuda_device_count_stateless() < self.world_size): if not ray_found: - raise ValueError("Unable to load Ray which is " + raise ValueError("Unable to load Ray: " + f"{ray_utils.ray_import_err}. Ray is " "required for multi-node inference, " "please install Ray with `pip install " - "ray`.") from ray_utils.ray_import_err + "ray`.") backend = "ray" elif self.data_parallel_backend == "ray": logger.info("Using ray distributed inference because " diff --git a/vllm/executor/ray_utils.py b/vllm/executor/ray_utils.py index c222f1609096..033ecc00853b 100644 --- a/vllm/executor/ray_utils.py +++ b/vllm/executor/ray_utils.py @@ -145,7 +145,9 @@ try: except ImportError as e: ray = None # type: ignore - ray_import_err = e + # only capture string to avoid variable references in the traceback that can + # prevent garbage collection in some cases + ray_import_err = str(e) RayWorkerWrapper = None # type: ignore @@ -157,8 +159,8 @@ def ray_is_available() -> bool: def assert_ray_available(): """Raise an exception if Ray is not available.""" if ray is None: - raise ValueError("Failed to import Ray, please install Ray with " - "`pip install ray`.") from ray_import_err + raise ValueError(f"Failed to import Ray: {ray_import_err}." + "Please install Ray with `pip install ray`.") def _verify_bundles(placement_group: "PlacementGroup",