From afb1e5b380ff623e478d19a246b42b2903b9331f Mon Sep 17 00:00:00 2001 From: Divakar Verma <137818590+divakar-amd@users.noreply.github.com> Date: Tue, 2 Dec 2025 14:46:10 -0600 Subject: [PATCH] [CI][ROCm][tests/v1/e2e] Fix multiprocessing launch for the test (#29123) Signed-off-by: Divakar Verma --- tests/v1/e2e/test_kv_sharing_fast_prefill.py | 22 +++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/tests/v1/e2e/test_kv_sharing_fast_prefill.py b/tests/v1/e2e/test_kv_sharing_fast_prefill.py index 2778b0c5e5670..f895fb72e94a1 100644 --- a/tests/v1/e2e/test_kv_sharing_fast_prefill.py +++ b/tests/v1/e2e/test_kv_sharing_fast_prefill.py @@ -7,6 +7,7 @@ import pytest from vllm import LLM, SamplingParams from vllm.config import CompilationConfig, CompilationMode +from vllm.platforms import current_platform from ...utils import check_answers, fork_new_process_for_each_test, prep_prompts @@ -43,15 +44,26 @@ def test_prompts(): return prompts -@fork_new_process_for_each_test +use_fork_for_test = ( + fork_new_process_for_each_test if not current_platform.is_rocm() else lambda x: x +) + + +@use_fork_for_test @pytest.mark.parametrize("kv_sharing_fast_prefill", [False, True]) @pytest.mark.parametrize("enforce_eager", [True, False]) def test_kv_sharing_fast_prefill( monkeypatch: pytest.MonkeyPatch, kv_sharing_fast_prefill: bool, enforce_eager: bool, - test_prompts: list[str], ): + if not enforce_eager and current_platform.is_rocm(): + # Relevant context: https://github.com/vllm-project/vllm/pull/29244 + pytest.skip( + "ROCm: torch.compile produces incorrect output for gemma-3n's GELU " + "with tanh approximation. Use enforce_eager=True instead." + ) + sampling_params = SamplingParams(temperature=0.0, max_tokens=100) compilation_config = CompilationConfig( # This allows vLLM compilation backend to handle allocating and @@ -65,7 +77,11 @@ def test_kv_sharing_fast_prefill( with monkeypatch.context() as m: # Make scheduling deterministic for reproducibility - m.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0") + if current_platform.is_rocm(): + # Use spawn to prevent cuda re-initialization error + m.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn") + else: + m.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0") prompts, answer, indices = prep_prompts(batch_size)