[XPU] Use spawn with XPU multiprocessing (#20649)

Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
2026-05-27 07:34:25 +08:00 · 2025-07-09 00:34:28 -07:00 · 2025-07-09 00:34:28 -07:00 · e760fcef22
commit e760fcef22
parent 6bbf1795b7
3 changed files with 15 additions and 5 deletions
--- a/tests/utils.py
+++ b/tests/utils.py
@ -818,14 +818,15 @@ def create_new_process_for_each_test(
    Args:
        method: The process creation method. Can be either "spawn" or "fork". 
-               If not specified,
+               If not specified, it defaults to "spawn" on ROCm and XPU
-               it defaults to "spawn" on ROCm platforms and "fork" otherwise.
+               platforms and "fork" otherwise.
    Returns:
        A decorator to run test functions in separate processes.
    """
    if method is None:
-        method = "spawn" if current_platform.is_rocm() else "fork"
+        use_spawn = current_platform.is_rocm() or current_platform.is_xpu()
        method = "spawn" if use_spawn else "fork"
    assert method in ["spawn",
                      "fork"], "Method must be either 'spawn' or 'fork'"
--- a/tests/v1/e2e/test_cascade_attention.py
+++ b/tests/v1/e2e/test_cascade_attention.py
@ -5,10 +5,10 @@ import pytest
 from vllm import LLM, SamplingParams
-from ...utils import fork_new_process_for_each_test
+from ...utils import create_new_process_for_each_test
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
@pytest.mark.parametrize("attn_backend",
                         ["FLASH_ATTN_VLLM_V1", "FLASHINFER_VLLM_V1"])
 def test_cascade_attention(example_system_message, monkeypatch, attn_backend):
--- a/vllm/utils/init.py
+++ b/vllm/utils/init.py
@ -1535,6 +1535,13 @@ def cuda_is_initialized() -> bool:
    return torch.cuda.is_initialized()
 def xpu_is_initialized() -> bool:
    """Check if XPU is initialized."""
    if not torch.xpu._is_compiled():
        return False
    return torch.xpu.is_initialized()
 def cuda_get_device_properties(device,
                               names: Sequence[str],
                               init_cuda=False) -> tuple[Any, ...]:
@ -2848,6 +2855,8 @@ def _maybe_force_spawn():
    reason = None
    if cuda_is_initialized():
        reason = "CUDA is initialized"
    elif xpu_is_initialized():
        reason = "XPU is initialized"
    elif is_in_ray_actor():
        # even if we choose to spawn, we need to pass the ray address
        # to the subprocess so that it knows how to connect to the ray cluster.