AMD conditional all test execution // new test groups (#17556)

Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com> Signed-off-by: Yida Wu <yidawu@alumni.cmu.edu>
2026-03-16 11:57:14 +08:00 · 2025-05-09 17:35:58 -05:00 · 2025-05-09 17:35:58 -05:00 · 3b602cdea7
commit 3b602cdea7
parent 4b2ed7926a
3 changed files with 69 additions and 25 deletions
--- a/.buildkite/scripts/hardware_ci/run-amd-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh
@ -3,6 +3,9 @@
 # This script runs test inside the corresponding ROCm docker container.
 set -o pipefail

+# Export Python path
+export PYTHONPATH=".."
+
 # Print ROCm version
 echo "--- Confirming Clean Initial State"
 while true; do
@ -74,6 +77,15 @@ HF_MOUNT="/root/.cache/huggingface"

 commands=$@
 echo "Commands:$commands"
+
+if [[ $commands == *"pytest -v -s basic_correctness/test_basic_correctness.py"* ]]; then
+  commands=${commands//"pytest -v -s basic_correctness/test_basic_correctness.py"/"VLLM_USE_TRITON_FLASH_ATTN=0 pytest -v -s basic_correctness/test_basic_correctness.py"}
+fi
+
+if [[ $commands == *"pytest -v -s compile/test_basic_correctness.py"* ]]; then
+  commands=${commands//"pytest -v -s compile/test_basic_correctness.py"/"VLLM_USE_TRITON_FLASH_ATTN=0 pytest -v -s compile/test_basic_correctness.py"}
+fi
+
 #ignore certain kernels tests
 if [[ $commands == *" kernels/core"* ]]; then
  commands="${commands} \
@ -161,6 +173,8 @@ fi


 PARALLEL_JOB_COUNT=8
+MYPYTHONPATH=".."
+
 # check if the command contains shard flag, we will run all shards in parallel because the host have 8 GPUs. 
 if [[ $commands == *"--shard-id="* ]]; then
  # assign job count as the number of shards used   
@ -181,6 +195,7 @@ if [[ $commands == *"--shard-id="* ]]; then
        -e AWS_SECRET_ACCESS_KEY \
        -v "${HF_CACHE}:${HF_MOUNT}" \
        -e "HF_HOME=${HF_MOUNT}" \
+        -e "PYTHONPATH=${MYPYTHONPATH}" \
        --name "${container_name}_${GPU}" \
        "${image_name}" \
        /bin/bash -c "${commands_gpu}" \
@ -211,6 +226,7 @@ else
          -e AWS_SECRET_ACCESS_KEY \
          -v "${HF_CACHE}:${HF_MOUNT}" \
          -e "HF_HOME=${HF_MOUNT}" \
+          -e "PYTHONPATH=${MYPYTHONPATH}" \
          --name "${container_name}" \
          "${image_name}" \
          /bin/bash -c "${commands}"
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@ -32,6 +32,7 @@ steps:
 ##### fast check tests  #####

 - label: Documentation Build # 2min
+  mirror_hardwares: [amdexperimental]
  working_dir: "/vllm-workspace/test_docs/docs"
  fast_check: true
  no_gpu: True
@ -42,6 +43,7 @@ steps:
  - grep \"sig sig-object py\" build/html/api/vllm/vllm.sampling_params.html

 - label: Async Engine, Inputs, Utils, Worker Test # 24min
+  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
  - vllm/
  - tests/mq_llm_engine
@ -62,6 +64,7 @@ steps:
  - pytest -v -s worker # Worker

 - label: Python-only Installation Test
+  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
  - tests/standalone_tests/python_only_compile.sh
  - setup.py
@ -69,7 +72,7 @@ steps:
  - bash standalone_tests/python_only_compile.sh

 - label: Basic Correctness Test # 30min
-  #mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental, amdproduction]
  fast_check: true
  torch_nightly: true
  source_file_dependencies:
@ -86,6 +89,7 @@ steps:
  - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py

 - label: Chunked Prefill Test
+  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
  - vllm/
  - tests/basic_correctness/test_chunked_prefill
@ -94,7 +98,7 @@ steps:
  - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py

 - label: Core Test # 10min
-  mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental, amdproduction]
  fast_check: true
  source_file_dependencies:
  - vllm/core
@ -104,10 +108,10 @@ steps:
  - pytest -v -s core

 - label: Entrypoints Test # 40min
+  mirror_hardwares: [amdexperimental]
  working_dir: "/vllm-workspace/tests"
  fast_check: true
  torch_nightly: true
-  #mirror_hardwares: [amd]
  source_file_dependencies:
  - vllm/
  - tests/entrypoints/llm
@ -126,6 +130,7 @@ steps:
  - VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests

 - label: Distributed Tests (4 GPUs) # 10min
+  mirror_hardwares: [amdexperimental]
  working_dir: "/vllm-workspace/tests"
  num_gpus: 4
  source_file_dependencies:
@ -158,7 +163,7 @@ steps:
  - popd

 - label: Metrics, Tracing Test # 10min
-  mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental, amdproduction]
  num_gpus: 2
  source_file_dependencies:
  - vllm/
@ -172,7 +177,7 @@ steps:
 #####  1 GPU test  #####

 - label: Regression Test # 5min
-  #mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental, amdproduction]
  source_file_dependencies:
  - vllm/
  - tests/test_regression
@ -182,7 +187,7 @@ steps:
  working_dir: "/vllm-workspace/tests" # optional

 - label: Engine Test # 10min
-  mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental, amdproduction]
  source_file_dependencies:
  - vllm/
  - tests/engine
@ -196,7 +201,7 @@ steps:
  - pytest -v -s tokenization

 - label: V1 Test
-  #mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
    - vllm/
    - tests/v1
@ -221,8 +226,8 @@ steps:
    - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine

 - label: Examples Test # 25min
+  mirror_hardwares: [amdexperimental]
  working_dir: "/vllm-workspace/examples"
-  #mirror_hardwares: [amd]
  source_file_dependencies:
  - vllm/entrypoints
  - examples/
@ -246,7 +251,7 @@ steps:
    - VLLM_USE_V1=0 python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2

 - label: Prefix Caching Test # 9min
-  mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental, amdproduction]
  source_file_dependencies:
  - vllm/
  - tests/prefix_caching
@ -254,6 +259,7 @@ steps:
    - pytest -v -s prefix_caching

 - label: Samplers Test # 36min
+  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
  - vllm/model_executor/layers
  - vllm/sampling_metadata.py
@ -264,7 +270,7 @@ steps:
    - VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers

 - label: LogitsProcessor Test # 5min
-  mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental, amdproduction]
  source_file_dependencies:
  - vllm/model_executor/layers
  - vllm/model_executor/guided_decoding
@ -275,6 +281,7 @@ steps:
    - pytest -v -s model_executor/test_guided_processors.py

 - label: Speculative decoding tests # 40min
+  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
  - vllm/spec_decode
  - tests/spec_decode
@ -285,7 +292,7 @@ steps:
    - pytest -v -s spec_decode/e2e/test_eagle_correctness.py

 - label: LoRA Test %N # 15min each
-  #mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
  - vllm/lora
  - tests/lora
@ -293,6 +300,7 @@ steps:
  parallelism: 4

 - label: PyTorch Compilation Unit Tests
+  mirror_hardwares: [amdexperimental, amdproduction]
  torch_nightly: true
  source_file_dependencies:
    - vllm/
@ -303,6 +311,7 @@ steps:
    - pytest -v -s compile/test_sequence_parallelism.py

 - label: PyTorch Fullgraph Smoke Test # 9min
+  mirror_hardwares: [amdexperimental, amdproduction]
  torch_nightly: true
  source_file_dependencies:
  - vllm/
@ -314,6 +323,7 @@ steps:
  - pytest -v -s compile/piecewise/test_toy_llama.py

 - label: PyTorch Fullgraph Test # 18min
+  mirror_hardwares: [amdexperimental, amdproduction]
  torch_nightly: true
  source_file_dependencies:
  - vllm/
@ -322,7 +332,7 @@ steps:
  - pytest -v -s compile/test_full_graph.py

 - label: Kernels Core Operation Test
-  mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental, amdproduction]
  source_file_dependencies:
  - csrc/
  - tests/kernels/core
@ -330,7 +340,7 @@ steps:
    - pytest -v -s kernels/core

 - label: Kernels Attention Test %N
-  mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental, amdproduction]
  source_file_dependencies:
  - csrc/attention/
  - vllm/attention
@ -341,7 +351,7 @@ steps:
  parallelism: 2

 - label: Kernels Quantization Test %N
-  mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental, amdproduction]
  source_file_dependencies:
  - csrc/quantization/
  - vllm/model_executor/layers/quantization
@ -351,7 +361,7 @@ steps:
  parallelism: 2

 - label: Kernels MoE Test
-  #mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
  - csrc/moe/
  - tests/kernels/moe
@ -360,7 +370,7 @@ steps:
    - pytest -v -s kernels/moe

 - label: Kernels Mamba Test
-  #mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
  - csrc/mamba/
  - tests/kernels/mamba
@ -368,7 +378,7 @@ steps:
    - pytest -v -s kernels/mamba

 - label: Tensorizer Test # 11min
-  # mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental]
  soft_fail: true
  source_file_dependencies:
  - vllm/model_executor/model_loader
@ -379,14 +389,15 @@ steps:
    - pytest -v -s tensorizer_loader

 - label: Benchmarks # 9min
+  mirror_hardwares: [amdexperimental, amdproduction]
  working_dir: "/vllm-workspace/.buildkite"
-  mirror_hardwares: [amd]
  source_file_dependencies:
  - benchmarks/
  commands:
  - bash scripts/run-benchmarks.sh

 - label: Benchmarks CLI Test # 10min
+  mirror_hardwares: [amdexperimental, amdproduction]
  source_file_dependencies:
  - vllm/
  - tests/benchmarks/
@ -394,6 +405,7 @@ steps:
  - pytest -v -s benchmarks/

 - label: Quantization Test
+  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
  - csrc/
  - vllm/model_executor/layers/quantization
@ -402,6 +414,7 @@ steps:
  - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization

 - label: LM Eval Small Models # 53min
+  mirror_hardwares: [amdexperimental]
  working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
  source_file_dependencies:
  - csrc/
@ -411,6 +424,7 @@ steps:
  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-small.txt --tp-size=1

 - label: OpenAI API correctness
+  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
  - csrc/
  - vllm/entrypoints/openai/
@ -419,6 +433,7 @@ steps:
  - pytest -s entrypoints/openai/correctness/

 - label: Encoder Decoder tests # 5min
+  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
  - vllm/
  - tests/encoder_decoder
@ -426,8 +441,8 @@ steps:
    - pytest -v -s encoder_decoder

 - label: OpenAI-Compatible Tool Use # 20 min
+  mirror_hardwares: [amdexperimental]
  fast_check: false
-  #mirror_hardwares: [ amd ]
  source_file_dependencies:
    - vllm/
    - tests/tool_use
@ -439,6 +454,7 @@ steps:
 #####  models test  #####

 - label: Basic Models Test # 24min
+  mirror_hardwares: [amdexperimental]
  torch_nightly: true
  source_file_dependencies:
  - vllm/
@ -454,7 +470,7 @@ steps:
    - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'plamo2'

 - label: Language Models Test (Standard)
-  #mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
  - vllm/
  - tests/models/language
@ -464,6 +480,7 @@ steps:
    - pytest -v -s models/language -m core_model

 - label: Language Models Test (Extended)
+  mirror_hardwares: [amdexperimental]
  optional: true
  source_file_dependencies:
  - vllm/
@ -474,7 +491,7 @@ steps:
    - pytest -v -s models/language -m 'not core_model'

 - label: Multi-Modal Models Test (Standard)
-  #mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
  - vllm/
  - tests/models/multimodal
@ -485,6 +502,7 @@ steps:
    - cd .. && pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model  # Otherwise, mp_method="spawn" doesn't work

 - label: Multi-Modal Models Test (Extended) 1
+  mirror_hardwares: [amdexperimental]
  optional: true
  source_file_dependencies:
  - vllm/
@ -494,6 +512,7 @@ steps:
    - pytest -v -s --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/processing models/multimodal -m 'not core_model'

 - label: Multi-Modal Models Test (Extended) 2
+  mirror_hardwares: [amdexperimental]
  optional: true
  source_file_dependencies:
  - vllm/
@ -503,6 +522,7 @@ steps:
    - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=0) and not core_model'

 - label: Multi-Modal Models Test (Extended) 3
+  mirror_hardwares: [amdexperimental]
  optional: true
  source_file_dependencies:
  - vllm/
@ -512,7 +532,7 @@ steps:
    - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model'

 - label: Quantized Models Test
-  #mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
  - vllm/model_executor/layers/quantization
  - tests/models/quantization
@ -521,7 +541,7 @@ steps:

 # This test is used only in PR development phase to test individual models and should never run on main
 - label: Custom Models Test
-  mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental, amdproduction]
  optional: true
  commands:
    - echo 'Testing custom models...'
@ -533,7 +553,7 @@ steps:
 #####  multi gpus test  #####

 - label: Distributed Comm Ops Test # 7min
-  mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental, amdproduction]
  working_dir: "/vllm-workspace/tests"
  num_gpus: 2
  source_file_dependencies:
@ -544,6 +564,7 @@ steps:
  - pytest -v -s distributed/test_shm_broadcast.py

 - label: 2 Node Tests (4 GPUs in total) # 16min
+  mirror_hardwares: [amdexperimental]
  working_dir: "/vllm-workspace/tests"
  num_gpus: 2
  num_nodes: 2
@ -562,7 +583,7 @@ steps:
    - VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed'

 - label: Distributed Tests (2 GPUs) # 40min
-  #mirror_hardwares: [amd]
+  mirror_hardwares: [amdexperimental]
  working_dir: "/vllm-workspace/tests"
  num_gpus: 2
  source_file_dependencies:
@ -599,6 +620,7 @@ steps:
  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown

 - label: Plugin Tests (2 GPUs) # 40min
+  mirror_hardwares: [amdexperimental]
  working_dir: "/vllm-workspace/tests"
  num_gpus: 2
  source_file_dependencies:
@ -618,6 +640,7 @@ steps:
  - pytest -v -s models/test_oot_registration.py # it needs a clean process

 - label: Multi-step Tests (4 GPUs) # 36min
+  mirror_hardwares: [amdexperimental]
  working_dir: "/vllm-workspace/tests"
  num_gpus: 4
  source_file_dependencies:
@ -638,6 +661,7 @@ steps:
  - pytest -v -s multi_step/test_correctness_llm.py

 - label: Pipeline Parallelism Test # 45min
+  mirror_hardwares: [amdexperimental, amdproduction]
  working_dir: "/vllm-workspace/tests"
  num_gpus: 4
  source_file_dependencies:
@ -651,6 +675,7 @@ steps:
  - pytest -v -s distributed/test_pipeline_parallel.py

 - label: LoRA TP Test (Distributed)
+  mirror_hardwares: [amdexperimental, amdproduction]
  num_gpus: 4
  source_file_dependencies:
  - vllm/lora
@ -666,6 +691,7 @@ steps:


 - label: Weight Loading Multiple GPU Test  # 33min
+  mirror_hardwares: [amdexperimental]
  working_dir: "/vllm-workspace/tests"
  num_gpus: 2
  source_file_dependencies:
--- a/requirements/rocm-test.txt
+++ b/requirements/rocm-test.txt
@ -1,3 +1,5 @@
+# Common dependencies
+-r common.txt

 # entrypoints test
 # librosa==0.10.2.post1 # required by audio tests in entrypoints/openai