diff --git a/tests/compile/fullgraph/test_full_graph.py b/tests/compile/fullgraph/test_full_graph.py index ffec14d36fd17..c5baa66cbeb07 100644 --- a/tests/compile/fullgraph/test_full_graph.py +++ b/tests/compile/fullgraph/test_full_graph.py @@ -13,9 +13,7 @@ from vllm import LLM, SamplingParams from vllm.attention.backends.registry import AttentionBackendEnum from vllm.config import CompilationConfig, CompilationMode, CUDAGraphMode, PassConfig from vllm.platforms import current_platform -from vllm.utils.torch_utils import ( - is_torch_equal_or_newer, -) +from vllm.utils.torch_utils import is_torch_equal_or_newer from ...utils import create_new_process_for_each_test diff --git a/vllm/env_override.py b/vllm/env_override.py index e091867fd0ca8..474ac69919eb1 100644 --- a/vllm/env_override.py +++ b/vllm/env_override.py @@ -377,9 +377,12 @@ def _patch_get_raw_stream_if_needed(): if is_torch_equal("2.9.0") or is_torch_equal("2.9.1"): import builtins - from torch._C import _cuda_getCurrentRawStream as _get_raw_stream + # Check if CUDA functionality is available without initializing CUDA + # _cuda_getCurrentRawStream only exists in CUDA builds of PyTorch + if hasattr(torch._C, "_cuda_getCurrentRawStream"): + from torch._C import _cuda_getCurrentRawStream as _get_raw_stream - builtins.get_raw_stream = _get_raw_stream + builtins.get_raw_stream = _get_raw_stream _patch_get_raw_stream_if_needed()