mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-27 07:34:25 +08:00
[XPU] Use spawn with XPU multiprocessing (#20649)
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
This commit is contained in:
parent
6bbf1795b7
commit
e760fcef22
@ -818,14 +818,15 @@ def create_new_process_for_each_test(
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
method: The process creation method. Can be either "spawn" or "fork".
|
method: The process creation method. Can be either "spawn" or "fork".
|
||||||
If not specified,
|
If not specified, it defaults to "spawn" on ROCm and XPU
|
||||||
it defaults to "spawn" on ROCm platforms and "fork" otherwise.
|
platforms and "fork" otherwise.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A decorator to run test functions in separate processes.
|
A decorator to run test functions in separate processes.
|
||||||
"""
|
"""
|
||||||
if method is None:
|
if method is None:
|
||||||
method = "spawn" if current_platform.is_rocm() else "fork"
|
use_spawn = current_platform.is_rocm() or current_platform.is_xpu()
|
||||||
|
method = "spawn" if use_spawn else "fork"
|
||||||
|
|
||||||
assert method in ["spawn",
|
assert method in ["spawn",
|
||||||
"fork"], "Method must be either 'spawn' or 'fork'"
|
"fork"], "Method must be either 'spawn' or 'fork'"
|
||||||
|
|||||||
@ -5,10 +5,10 @@ import pytest
|
|||||||
|
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
|
|
||||||
from ...utils import fork_new_process_for_each_test
|
from ...utils import create_new_process_for_each_test
|
||||||
|
|
||||||
|
|
||||||
@fork_new_process_for_each_test
|
@create_new_process_for_each_test()
|
||||||
@pytest.mark.parametrize("attn_backend",
|
@pytest.mark.parametrize("attn_backend",
|
||||||
["FLASH_ATTN_VLLM_V1", "FLASHINFER_VLLM_V1"])
|
["FLASH_ATTN_VLLM_V1", "FLASHINFER_VLLM_V1"])
|
||||||
def test_cascade_attention(example_system_message, monkeypatch, attn_backend):
|
def test_cascade_attention(example_system_message, monkeypatch, attn_backend):
|
||||||
|
|||||||
@ -1535,6 +1535,13 @@ def cuda_is_initialized() -> bool:
|
|||||||
return torch.cuda.is_initialized()
|
return torch.cuda.is_initialized()
|
||||||
|
|
||||||
|
|
||||||
|
def xpu_is_initialized() -> bool:
|
||||||
|
"""Check if XPU is initialized."""
|
||||||
|
if not torch.xpu._is_compiled():
|
||||||
|
return False
|
||||||
|
return torch.xpu.is_initialized()
|
||||||
|
|
||||||
|
|
||||||
def cuda_get_device_properties(device,
|
def cuda_get_device_properties(device,
|
||||||
names: Sequence[str],
|
names: Sequence[str],
|
||||||
init_cuda=False) -> tuple[Any, ...]:
|
init_cuda=False) -> tuple[Any, ...]:
|
||||||
@ -2848,6 +2855,8 @@ def _maybe_force_spawn():
|
|||||||
reason = None
|
reason = None
|
||||||
if cuda_is_initialized():
|
if cuda_is_initialized():
|
||||||
reason = "CUDA is initialized"
|
reason = "CUDA is initialized"
|
||||||
|
elif xpu_is_initialized():
|
||||||
|
reason = "XPU is initialized"
|
||||||
elif is_in_ray_actor():
|
elif is_in_ray_actor():
|
||||||
# even if we choose to spawn, we need to pass the ray address
|
# even if we choose to spawn, we need to pass the ray address
|
||||||
# to the subprocess so that it knows how to connect to the ray cluster.
|
# to the subprocess so that it knows how to connect to the ray cluster.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user