[ci] breaks down V1 Test into 3 groups of approx 30 minutes runtime (#23757)

Signed-off-by: Jean Schmidt <contato@jschmidt.me>
This commit is contained in:
Jean Schmidt 2025-08-28 17:59:19 +02:00 committed by GitHub
parent db74d60490
commit 0583578f42
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -234,7 +234,26 @@ steps:
# OOM in the CI unless we run this separately
- pytest -v -s tokenization
- label: V1 Test
- label: V1 Test e2e + engine
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- vllm/
- tests/v1
commands:
# TODO: accuracy does not match, whether setting
# VLLM_USE_FLASHINFER_SAMPLER or not on H100.
- pytest -v -s v1/e2e
- pytest -v -s v1/engine
- label: V1 Test entrypoints
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- vllm/
- tests/v1
commands:
- pytest -v -s v1/entrypoints
- label: V1 Test others
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- vllm/
@ -242,8 +261,6 @@ steps:
commands:
# split the test to avoid interference
- pytest -v -s v1/core
- pytest -v -s v1/engine
- pytest -v -s v1/entrypoints
- pytest -v -s v1/executor
- pytest -v -s v1/sample
- pytest -v -s v1/logits_processors
@ -256,9 +273,6 @@ steps:
- pytest -v -s v1/test_utils.py
- pytest -v -s v1/test_oracle.py
- pytest -v -s v1/test_metrics_reader.py
# TODO: accuracy does not match, whether setting
# VLLM_USE_FLASHINFER_SAMPLER or not on H100.
- pytest -v -s v1/e2e
# Integration test for streaming correctness (requires special branch).
- pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
- pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine