From fc168c33f35e0610d41206e864b6bf90fe613f19 Mon Sep 17 00:00:00 2001 From: Zhewen Li Date: Fri, 24 Oct 2025 12:26:00 -0700 Subject: [PATCH] [CI/Build] Fix test_torch_utils in AMD CI (#27317) Signed-off-by: zhewenli --- .buildkite/test-amd.yaml | 2 +- tests/utils_/test_torch_utils.py | 44 ++++++++++++++++++++++++-------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 17fe60356e21..92e27f143d8d 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -50,7 +50,7 @@ steps: - label: Async Engine, Inputs, Utils, Worker Test # 36min timeout_in_minutes: 50 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 # grade: Blocking source_file_dependencies: diff --git a/tests/utils_/test_torch_utils.py b/tests/utils_/test_torch_utils.py index 4a966276661c..0a30b9727f4d 100644 --- a/tests/utils_/test_torch_utils.py +++ b/tests/utils_/test_torch_utils.py @@ -60,15 +60,10 @@ def test_common_broadcastable_dtype(dtypes, expected_result): assert common_broadcastable_dtype(dtypes) == expected_result -def test_current_stream_multithread(): +def _test_stream_thread(main_expected_stream: torch.cuda.Stream): import threading - if not torch.cuda.is_available(): - pytest.skip("CUDA not available") - - main_default_stream = torch.cuda.current_stream() child_stream = torch.cuda.Stream() - thread_stream_ready = threading.Event() thread_can_exit = threading.Event() @@ -90,15 +85,44 @@ def test_current_stream_multithread(): assert main_current_stream != child_stream, ( "Main thread's current_stream was contaminated by child thread" ) - assert main_current_stream == main_default_stream, ( - "Main thread's current_stream is not the default stream" + assert main_current_stream == main_expected_stream, ( + f"Main thread's stream changed unexpectedly. " + f"Expected {main_expected_stream}, got {main_current_stream}" ) - # Notify child thread it can exit thread_can_exit.set() finally: - # Ensure child thread exits properly child_thread.join(timeout=5) if child_thread.is_alive(): pytest.fail("Child thread failed to exit properly") + + +def test_current_stream_multithread(): + from vllm.platforms import current_platform + + if not torch.cuda.is_available(): + pytest.skip("CUDA not available") + + if current_platform.is_rocm(): + main_dedicated_stream = current_stream() + + assert main_dedicated_stream.cuda_stream != 0, ( + "ROCm should create a dedicated stream, not use default stream (0x0)" + ) + + main_stream_again = current_stream() + assert main_stream_again == main_dedicated_stream, ( + "Multiple calls to current_stream should return the same dedicated stream" + ) + + _test_stream_thread(main_dedicated_stream) + else: + main_default_stream = torch.cuda.default_stream() + main_initial_stream = current_stream() + + assert main_initial_stream == main_default_stream, ( + "First call to current_stream should return default stream on CUDA" + ) + + _test_stream_thread(main_default_stream)