[ci] Sync test areas yaml file with test-pipeline (#30862)

Signed-off-by: Kevin H. Luu <khluu000@gmail.com>
This commit is contained in:
Kevin H. Luu 2025-12-17 02:30:56 -08:00 committed by GitHub
parent 9ad5b21710
commit 4bf6c23668
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 10 additions and 21 deletions

View File

@ -32,6 +32,7 @@ steps:
- label: Prime-RL Integration (2 GPUs)
timeout_in_minutes: 30
optional: true
soft_fail: true
num_gpus: 2
working_dir: "/vllm-workspace"
source_file_dependencies:
@ -39,21 +40,3 @@ steps:
- .buildkite/scripts/run-prime-rl-test.sh
commands:
- bash .buildkite/scripts/run-prime-rl-test.sh
- label: DeepSeek V2-Lite Async EPLB Accuracy
timeout_in_minutes: 60
gpu: h100
optional: true
num_gpus: 4
working_dir: "/vllm-workspace"
commands:
- bash .buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_async_eplb.sh 0.25 1319 8030
- label: Qwen3-Next-80B-A3B-Instruct MTP Async EPLB Accuracy
timeout_in_minutes: 60
gpu: h100
optional: true
num_gpus: 4
working_dir: "/vllm-workspace"
commands:
- bash .buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh 0.8 1319 8040

View File

@ -9,7 +9,7 @@ steps:
- vllm/model_executor/layers/quantization
autorun_on_main: true
commands:
- pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-small.txt --tp-size=1
- pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-small.txt
- label: LM Eval Large Models (4 GPUs)(A100)
gpu: a100
@ -43,4 +43,4 @@ steps:
- csrc/
- vllm/model_executor/layers/quantization
commands:
- pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-blackwell.txt --tp-size=1
- pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-blackwell.txt

View File

@ -22,6 +22,8 @@ steps:
# FIXIT: find out which code initialize cuda before running the test
# before the fix, we need to use spawn to test it
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
# Alot of these tests are on the edge of OOMing
- export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
# There is some Tensor Parallelism related processing logic in LoRA that
# requires multi-GPU testing for validation.
- pytest -v -s -x lora/test_chatglm3_tp.py

View File

@ -9,6 +9,7 @@ steps:
source_file_dependencies:
- vllm/
- tests/models/test_initialization.py
- tests/models/registry.py
commands:
# Run a subset of model initialization tests
- pytest -v -s models/test_initialization.py::test_can_initialize_small_subset
@ -20,6 +21,7 @@ steps:
source_file_dependencies:
- vllm/model_executor/models/
- tests/models/test_initialization.py
- tests/models/registry.py
commands:
# Only when vLLM model source is modified - test initialization of a large
# subset of supported models (the complement of the small subset in the above

View File

@ -13,7 +13,9 @@ steps:
# tests covered elsewhere.
# Use `find` to launch multiple instances of pytest so that
# they do not suffer from https://github.com/vllm-project/vllm/issues/28965
- "find compile/ -maxdepth 1 -name 'test_*.py' -exec pytest -s -v {} \\;"
# However, find does not normally propagate error codes, so we combine it with xargs
# (using -0 for proper path handling)
- "find compile/ -maxdepth 1 -name 'test_*.py' -print0 | xargs -0 -n1 -I{} pytest -s -v '{}'"
- label: PyTorch Fullgraph Smoke Test
timeout_in_minutes: 30