diff --git a/tests/utils.py b/tests/utils.py index a37872830dade..f4317e6bdb406 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -818,14 +818,15 @@ def create_new_process_for_each_test( Args: method: The process creation method. Can be either "spawn" or "fork". - If not specified, - it defaults to "spawn" on ROCm platforms and "fork" otherwise. + If not specified, it defaults to "spawn" on ROCm and XPU + platforms and "fork" otherwise. Returns: A decorator to run test functions in separate processes. """ if method is None: - method = "spawn" if current_platform.is_rocm() else "fork" + use_spawn = current_platform.is_rocm() or current_platform.is_xpu() + method = "spawn" if use_spawn else "fork" assert method in ["spawn", "fork"], "Method must be either 'spawn' or 'fork'" diff --git a/tests/v1/e2e/test_cascade_attention.py b/tests/v1/e2e/test_cascade_attention.py index 161bcd4d3ef9d..f2f460513605f 100644 --- a/tests/v1/e2e/test_cascade_attention.py +++ b/tests/v1/e2e/test_cascade_attention.py @@ -5,10 +5,10 @@ import pytest from vllm import LLM, SamplingParams -from ...utils import fork_new_process_for_each_test +from ...utils import create_new_process_for_each_test -@fork_new_process_for_each_test +@create_new_process_for_each_test() @pytest.mark.parametrize("attn_backend", ["FLASH_ATTN_VLLM_V1", "FLASHINFER_VLLM_V1"]) def test_cascade_attention(example_system_message, monkeypatch, attn_backend): diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index bfdbd682464a8..cf7320a19e4d9 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -1535,6 +1535,13 @@ def cuda_is_initialized() -> bool: return torch.cuda.is_initialized() +def xpu_is_initialized() -> bool: + """Check if XPU is initialized.""" + if not torch.xpu._is_compiled(): + return False + return torch.xpu.is_initialized() + + def cuda_get_device_properties(device, names: Sequence[str], init_cuda=False) -> tuple[Any, ...]: @@ -2848,6 +2855,8 @@ def _maybe_force_spawn(): reason = None if cuda_is_initialized(): reason = "CUDA is initialized" + elif xpu_is_initialized(): + reason = "XPU is initialized" elif is_in_ray_actor(): # even if we choose to spawn, we need to pass the ray address # to the subprocess so that it knows how to connect to the ray cluster.