From d65668b4e88008adccb91c6598c93c2d53487484 Mon Sep 17 00:00:00 2001 From: Concurrensee Date: Fri, 13 Jun 2025 19:08:51 -0500 Subject: [PATCH] Adding "AMD: Multi-step Tests" to amdproduction. (#19508) Signed-off-by: Yida Wu Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Cyrus Leung --- .buildkite/test-pipeline.yaml | 2 +- tests/multi_step/test_correctness_llm.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 364561c46c260..8f39862708689 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -675,7 +675,7 @@ steps: - pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins - label: Multi-step Tests (4 GPUs) # 36min - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] working_dir: "/vllm-workspace/tests" num_gpus: 4 source_file_dependencies: diff --git a/tests/multi_step/test_correctness_llm.py b/tests/multi_step/test_correctness_llm.py index 9f1b3bbe8e226..0df00c98b72cf 100644 --- a/tests/multi_step/test_correctness_llm.py +++ b/tests/multi_step/test_correctness_llm.py @@ -8,6 +8,7 @@ from typing import Optional import pytest +from vllm.platforms import current_platform from vllm.utils import STR_BACKEND_ENV_VAR from ..models.utils import check_logprobs_close, check_outputs_equal @@ -71,6 +72,12 @@ def test_multi_step_llm( num_logprobs: corresponds to the `logprobs` argument to the OpenAI completions endpoint; `None` -> 1 logprob returned. """ + if current_platform.is_rocm() and \ + (attention_backend == "FLASHINFER" or enable_chunked_prefill): + pytest.skip( + "Multi-Step with FLASHINFER or Chunked-Prefill is not supported" + "on ROCm") + with monkeypatch.context() as m: m.setenv(STR_BACKEND_ENV_VAR, attention_backend) @@ -221,6 +228,9 @@ def test_multi_step_llm_w_prompt_logprobs( @pytest.mark.parametrize("num_prompts", NUM_PROMPTS) @pytest.mark.parametrize("num_logprobs", [None, 5]) @pytest.mark.parametrize("attention_backend", ["FLASH_ATTN"]) +@pytest.mark.skipif( + current_platform.is_rocm(), + reason="Multi-Step + Chunked-Prefill not supported on ROCm") def test_multi_step_llm_chunked_prefill_prefix_cache( vllm_runner, example_prompts,