AMD conditional all test execution // new test groups (#17556)

Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com>
Signed-off-by: Yida Wu <yidawu@alumni.cmu.edu>
This commit is contained in:
Alexei-V-Ivanov-AMD 2025-05-09 17:35:58 -05:00 committed by GitHub
parent 4b2ed7926a
commit 3b602cdea7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 69 additions and 25 deletions

View File

@ -3,6 +3,9 @@
# This script runs test inside the corresponding ROCm docker container.
set -o pipefail
# Export Python path
export PYTHONPATH=".."
# Print ROCm version
echo "--- Confirming Clean Initial State"
while true; do
@ -74,6 +77,15 @@ HF_MOUNT="/root/.cache/huggingface"
commands=$@
echo "Commands:$commands"
if [[ $commands == *"pytest -v -s basic_correctness/test_basic_correctness.py"* ]]; then
commands=${commands//"pytest -v -s basic_correctness/test_basic_correctness.py"/"VLLM_USE_TRITON_FLASH_ATTN=0 pytest -v -s basic_correctness/test_basic_correctness.py"}
fi
if [[ $commands == *"pytest -v -s compile/test_basic_correctness.py"* ]]; then
commands=${commands//"pytest -v -s compile/test_basic_correctness.py"/"VLLM_USE_TRITON_FLASH_ATTN=0 pytest -v -s compile/test_basic_correctness.py"}
fi
#ignore certain kernels tests
if [[ $commands == *" kernels/core"* ]]; then
commands="${commands} \
@ -161,6 +173,8 @@ fi
PARALLEL_JOB_COUNT=8
MYPYTHONPATH=".."
# check if the command contains shard flag, we will run all shards in parallel because the host have 8 GPUs.
if [[ $commands == *"--shard-id="* ]]; then
# assign job count as the number of shards used
@ -181,6 +195,7 @@ if [[ $commands == *"--shard-id="* ]]; then
-e AWS_SECRET_ACCESS_KEY \
-v "${HF_CACHE}:${HF_MOUNT}" \
-e "HF_HOME=${HF_MOUNT}" \
-e "PYTHONPATH=${MYPYTHONPATH}" \
--name "${container_name}_${GPU}" \
"${image_name}" \
/bin/bash -c "${commands_gpu}" \
@ -211,6 +226,7 @@ else
-e AWS_SECRET_ACCESS_KEY \
-v "${HF_CACHE}:${HF_MOUNT}" \
-e "HF_HOME=${HF_MOUNT}" \
-e "PYTHONPATH=${MYPYTHONPATH}" \
--name "${container_name}" \
"${image_name}" \
/bin/bash -c "${commands}"

View File

@ -32,6 +32,7 @@ steps:
##### fast check tests #####
- label: Documentation Build # 2min
mirror_hardwares: [amdexperimental]
working_dir: "/vllm-workspace/test_docs/docs"
fast_check: true
no_gpu: True
@ -42,6 +43,7 @@ steps:
- grep \"sig sig-object py\" build/html/api/vllm/vllm.sampling_params.html
- label: Async Engine, Inputs, Utils, Worker Test # 24min
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- vllm/
- tests/mq_llm_engine
@ -62,6 +64,7 @@ steps:
- pytest -v -s worker # Worker
- label: Python-only Installation Test
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- tests/standalone_tests/python_only_compile.sh
- setup.py
@ -69,7 +72,7 @@ steps:
- bash standalone_tests/python_only_compile.sh
- label: Basic Correctness Test # 30min
#mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental, amdproduction]
fast_check: true
torch_nightly: true
source_file_dependencies:
@ -86,6 +89,7 @@ steps:
- VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
- label: Chunked Prefill Test
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- vllm/
- tests/basic_correctness/test_chunked_prefill
@ -94,7 +98,7 @@ steps:
- VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
- label: Core Test # 10min
mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental, amdproduction]
fast_check: true
source_file_dependencies:
- vllm/core
@ -104,10 +108,10 @@ steps:
- pytest -v -s core
- label: Entrypoints Test # 40min
mirror_hardwares: [amdexperimental]
working_dir: "/vllm-workspace/tests"
fast_check: true
torch_nightly: true
#mirror_hardwares: [amd]
source_file_dependencies:
- vllm/
- tests/entrypoints/llm
@ -126,6 +130,7 @@ steps:
- VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
- label: Distributed Tests (4 GPUs) # 10min
mirror_hardwares: [amdexperimental]
working_dir: "/vllm-workspace/tests"
num_gpus: 4
source_file_dependencies:
@ -158,7 +163,7 @@ steps:
- popd
- label: Metrics, Tracing Test # 10min
mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental, amdproduction]
num_gpus: 2
source_file_dependencies:
- vllm/
@ -172,7 +177,7 @@ steps:
##### 1 GPU test #####
- label: Regression Test # 5min
#mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental, amdproduction]
source_file_dependencies:
- vllm/
- tests/test_regression
@ -182,7 +187,7 @@ steps:
working_dir: "/vllm-workspace/tests" # optional
- label: Engine Test # 10min
mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental, amdproduction]
source_file_dependencies:
- vllm/
- tests/engine
@ -196,7 +201,7 @@ steps:
- pytest -v -s tokenization
- label: V1 Test
#mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- vllm/
- tests/v1
@ -221,8 +226,8 @@ steps:
- pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
- label: Examples Test # 25min
mirror_hardwares: [amdexperimental]
working_dir: "/vllm-workspace/examples"
#mirror_hardwares: [amd]
source_file_dependencies:
- vllm/entrypoints
- examples/
@ -246,7 +251,7 @@ steps:
- VLLM_USE_V1=0 python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2
- label: Prefix Caching Test # 9min
mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental, amdproduction]
source_file_dependencies:
- vllm/
- tests/prefix_caching
@ -254,6 +259,7 @@ steps:
- pytest -v -s prefix_caching
- label: Samplers Test # 36min
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- vllm/model_executor/layers
- vllm/sampling_metadata.py
@ -264,7 +270,7 @@ steps:
- VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers
- label: LogitsProcessor Test # 5min
mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental, amdproduction]
source_file_dependencies:
- vllm/model_executor/layers
- vllm/model_executor/guided_decoding
@ -275,6 +281,7 @@ steps:
- pytest -v -s model_executor/test_guided_processors.py
- label: Speculative decoding tests # 40min
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- vllm/spec_decode
- tests/spec_decode
@ -285,7 +292,7 @@ steps:
- pytest -v -s spec_decode/e2e/test_eagle_correctness.py
- label: LoRA Test %N # 15min each
#mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- vllm/lora
- tests/lora
@ -293,6 +300,7 @@ steps:
parallelism: 4
- label: PyTorch Compilation Unit Tests
mirror_hardwares: [amdexperimental, amdproduction]
torch_nightly: true
source_file_dependencies:
- vllm/
@ -303,6 +311,7 @@ steps:
- pytest -v -s compile/test_sequence_parallelism.py
- label: PyTorch Fullgraph Smoke Test # 9min
mirror_hardwares: [amdexperimental, amdproduction]
torch_nightly: true
source_file_dependencies:
- vllm/
@ -314,6 +323,7 @@ steps:
- pytest -v -s compile/piecewise/test_toy_llama.py
- label: PyTorch Fullgraph Test # 18min
mirror_hardwares: [amdexperimental, amdproduction]
torch_nightly: true
source_file_dependencies:
- vllm/
@ -322,7 +332,7 @@ steps:
- pytest -v -s compile/test_full_graph.py
- label: Kernels Core Operation Test
mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental, amdproduction]
source_file_dependencies:
- csrc/
- tests/kernels/core
@ -330,7 +340,7 @@ steps:
- pytest -v -s kernels/core
- label: Kernels Attention Test %N
mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental, amdproduction]
source_file_dependencies:
- csrc/attention/
- vllm/attention
@ -341,7 +351,7 @@ steps:
parallelism: 2
- label: Kernels Quantization Test %N
mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental, amdproduction]
source_file_dependencies:
- csrc/quantization/
- vllm/model_executor/layers/quantization
@ -351,7 +361,7 @@ steps:
parallelism: 2
- label: Kernels MoE Test
#mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- csrc/moe/
- tests/kernels/moe
@ -360,7 +370,7 @@ steps:
- pytest -v -s kernels/moe
- label: Kernels Mamba Test
#mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- csrc/mamba/
- tests/kernels/mamba
@ -368,7 +378,7 @@ steps:
- pytest -v -s kernels/mamba
- label: Tensorizer Test # 11min
# mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental]
soft_fail: true
source_file_dependencies:
- vllm/model_executor/model_loader
@ -379,14 +389,15 @@ steps:
- pytest -v -s tensorizer_loader
- label: Benchmarks # 9min
mirror_hardwares: [amdexperimental, amdproduction]
working_dir: "/vllm-workspace/.buildkite"
mirror_hardwares: [amd]
source_file_dependencies:
- benchmarks/
commands:
- bash scripts/run-benchmarks.sh
- label: Benchmarks CLI Test # 10min
mirror_hardwares: [amdexperimental, amdproduction]
source_file_dependencies:
- vllm/
- tests/benchmarks/
@ -394,6 +405,7 @@ steps:
- pytest -v -s benchmarks/
- label: Quantization Test
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- csrc/
- vllm/model_executor/layers/quantization
@ -402,6 +414,7 @@ steps:
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization
- label: LM Eval Small Models # 53min
mirror_hardwares: [amdexperimental]
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
source_file_dependencies:
- csrc/
@ -411,6 +424,7 @@ steps:
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-small.txt --tp-size=1
- label: OpenAI API correctness
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- csrc/
- vllm/entrypoints/openai/
@ -419,6 +433,7 @@ steps:
- pytest -s entrypoints/openai/correctness/
- label: Encoder Decoder tests # 5min
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- vllm/
- tests/encoder_decoder
@ -426,8 +441,8 @@ steps:
- pytest -v -s encoder_decoder
- label: OpenAI-Compatible Tool Use # 20 min
mirror_hardwares: [amdexperimental]
fast_check: false
#mirror_hardwares: [ amd ]
source_file_dependencies:
- vllm/
- tests/tool_use
@ -439,6 +454,7 @@ steps:
##### models test #####
- label: Basic Models Test # 24min
mirror_hardwares: [amdexperimental]
torch_nightly: true
source_file_dependencies:
- vllm/
@ -454,7 +470,7 @@ steps:
- VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'plamo2'
- label: Language Models Test (Standard)
#mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- vllm/
- tests/models/language
@ -464,6 +480,7 @@ steps:
- pytest -v -s models/language -m core_model
- label: Language Models Test (Extended)
mirror_hardwares: [amdexperimental]
optional: true
source_file_dependencies:
- vllm/
@ -474,7 +491,7 @@ steps:
- pytest -v -s models/language -m 'not core_model'
- label: Multi-Modal Models Test (Standard)
#mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- vllm/
- tests/models/multimodal
@ -485,6 +502,7 @@ steps:
- cd .. && pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model # Otherwise, mp_method="spawn" doesn't work
- label: Multi-Modal Models Test (Extended) 1
mirror_hardwares: [amdexperimental]
optional: true
source_file_dependencies:
- vllm/
@ -494,6 +512,7 @@ steps:
- pytest -v -s --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/processing models/multimodal -m 'not core_model'
- label: Multi-Modal Models Test (Extended) 2
mirror_hardwares: [amdexperimental]
optional: true
source_file_dependencies:
- vllm/
@ -503,6 +522,7 @@ steps:
- pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=0) and not core_model'
- label: Multi-Modal Models Test (Extended) 3
mirror_hardwares: [amdexperimental]
optional: true
source_file_dependencies:
- vllm/
@ -512,7 +532,7 @@ steps:
- pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model'
- label: Quantized Models Test
#mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- vllm/model_executor/layers/quantization
- tests/models/quantization
@ -521,7 +541,7 @@ steps:
# This test is used only in PR development phase to test individual models and should never run on main
- label: Custom Models Test
mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental, amdproduction]
optional: true
commands:
- echo 'Testing custom models...'
@ -533,7 +553,7 @@ steps:
##### multi gpus test #####
- label: Distributed Comm Ops Test # 7min
mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental, amdproduction]
working_dir: "/vllm-workspace/tests"
num_gpus: 2
source_file_dependencies:
@ -544,6 +564,7 @@ steps:
- pytest -v -s distributed/test_shm_broadcast.py
- label: 2 Node Tests (4 GPUs in total) # 16min
mirror_hardwares: [amdexperimental]
working_dir: "/vllm-workspace/tests"
num_gpus: 2
num_nodes: 2
@ -562,7 +583,7 @@ steps:
- VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed'
- label: Distributed Tests (2 GPUs) # 40min
#mirror_hardwares: [amd]
mirror_hardwares: [amdexperimental]
working_dir: "/vllm-workspace/tests"
num_gpus: 2
source_file_dependencies:
@ -599,6 +620,7 @@ steps:
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
- label: Plugin Tests (2 GPUs) # 40min
mirror_hardwares: [amdexperimental]
working_dir: "/vllm-workspace/tests"
num_gpus: 2
source_file_dependencies:
@ -618,6 +640,7 @@ steps:
- pytest -v -s models/test_oot_registration.py # it needs a clean process
- label: Multi-step Tests (4 GPUs) # 36min
mirror_hardwares: [amdexperimental]
working_dir: "/vllm-workspace/tests"
num_gpus: 4
source_file_dependencies:
@ -638,6 +661,7 @@ steps:
- pytest -v -s multi_step/test_correctness_llm.py
- label: Pipeline Parallelism Test # 45min
mirror_hardwares: [amdexperimental, amdproduction]
working_dir: "/vllm-workspace/tests"
num_gpus: 4
source_file_dependencies:
@ -651,6 +675,7 @@ steps:
- pytest -v -s distributed/test_pipeline_parallel.py
- label: LoRA TP Test (Distributed)
mirror_hardwares: [amdexperimental, amdproduction]
num_gpus: 4
source_file_dependencies:
- vllm/lora
@ -666,6 +691,7 @@ steps:
- label: Weight Loading Multiple GPU Test # 33min
mirror_hardwares: [amdexperimental]
working_dir: "/vllm-workspace/tests"
num_gpus: 2
source_file_dependencies:

View File

@ -1,3 +1,5 @@
# Common dependencies
-r common.txt
# entrypoints test
# librosa==0.10.2.post1 # required by audio tests in entrypoints/openai