From 55c21c88363811feb2aeb5aecac3cd48683e4705 Mon Sep 17 00:00:00 2001 From: Micah Williamson Date: Sat, 22 Nov 2025 23:05:00 -0600 Subject: [PATCH] [ROCm][CI] Fix "Cannot re-initialize CUDA in forked subprocess" in test_pynccl.py (#29119) Signed-off-by: Micah Williamson --- requirements/rocm-test.txt | 3 +++ tests/distributed/test_pynccl.py | 8 +++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/requirements/rocm-test.txt b/requirements/rocm-test.txt index eabb5065bfce..2d57e7e16786 100644 --- a/requirements/rocm-test.txt +++ b/requirements/rocm-test.txt @@ -40,5 +40,8 @@ mteb[bm25s]>=1.38.11, <2 # Required for eval tests lm-eval[api] @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d +# Required for multiprocessed tests that use spawn method +multiprocess==0.70.16 + # Plugins test terratorch @ git+https://github.com/IBM/terratorch.git@07184fcf91a1324f831ff521dd238d97fe350e3e diff --git a/tests/distributed/test_pynccl.py b/tests/distributed/test_pynccl.py index c3085beeb356..c7c9d0602def 100644 --- a/tests/distributed/test_pynccl.py +++ b/tests/distributed/test_pynccl.py @@ -1,9 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import multiprocessing import os +import multiprocess as mp import numpy as np import pytest import torch @@ -20,10 +20,12 @@ from vllm.distributed.parallel_state import ( ) from vllm.utils.system_utils import update_environment_variables +mp.set_start_method("spawn", force=True) + def distributed_run(fn, world_size): number_of_processes = world_size - processes: list[multiprocessing.Process] = [] + processes: list[mp.Process] = [] for i in range(number_of_processes): env: dict[str, str] = {} env["RANK"] = str(i) @@ -32,7 +34,7 @@ def distributed_run(fn, world_size): env["LOCAL_WORLD_SIZE"] = str(number_of_processes) env["MASTER_ADDR"] = "localhost" env["MASTER_PORT"] = "12345" - p = multiprocessing.Process(target=fn, args=(env,)) + p = mp.Process(target=fn, args=(env,)) processes.append(p) p.start()