[XPU] Use spawn with XPU multiprocessing (#20649)

Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
2026-01-26 07:54:30 +08:00 · 2025-07-09 00:34:28 -07:00 · 2025-07-09 00:34:28 -07:00 · e760fcef22
commit e760fcef22
parent 6bbf1795b7
3 changed files with 15 additions and 5 deletions
--- a/tests/utils.py
+++ b/tests/utils.py
@ -818,14 +818,15 @@ def create_new_process_for_each_test(

    Args:
        method: The process creation method. Can be either "spawn" or "fork". 
-               If not specified,
-               it defaults to "spawn" on ROCm platforms and "fork" otherwise.
+               If not specified, it defaults to "spawn" on ROCm and XPU
+               platforms and "fork" otherwise.

    Returns:
        A decorator to run test functions in separate processes.
    """
    if method is None:
-        method = "spawn" if current_platform.is_rocm() else "fork"
+        use_spawn = current_platform.is_rocm() or current_platform.is_xpu()
+        method = "spawn" if use_spawn else "fork"

    assert method in ["spawn",
                      "fork"], "Method must be either 'spawn' or 'fork'"
--- a/tests/v1/e2e/test_cascade_attention.py
+++ b/tests/v1/e2e/test_cascade_attention.py
@ -5,10 +5,10 @@ import pytest

 from vllm import LLM, SamplingParams

-from ...utils import fork_new_process_for_each_test
+from ...utils import create_new_process_for_each_test


-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
@pytest.mark.parametrize("attn_backend",
                         ["FLASH_ATTN_VLLM_V1", "FLASHINFER_VLLM_V1"])
 def test_cascade_attention(example_system_message, monkeypatch, attn_backend):
--- a/vllm/utils/init.py
+++ b/vllm/utils/init.py
@ -1535,6 +1535,13 @@ def cuda_is_initialized() -> bool:
    return torch.cuda.is_initialized()


+def xpu_is_initialized() -> bool:
+    """Check if XPU is initialized."""
+    if not torch.xpu._is_compiled():
+        return False
+    return torch.xpu.is_initialized()
+
+
 def cuda_get_device_properties(device,
                               names: Sequence[str],
                               init_cuda=False) -> tuple[Any, ...]:
@ -2848,6 +2855,8 @@ def _maybe_force_spawn():
    reason = None
    if cuda_is_initialized():
        reason = "CUDA is initialized"
+    elif xpu_is_initialized():
+        reason = "XPU is initialized"
    elif is_in_ray_actor():
        # even if we choose to spawn, we need to pass the ray address
        # to the subprocess so that it knows how to connect to the ray cluster.