diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 2bf0b6fd9a169..a7fe200559305 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -82,7 +82,7 @@ steps: - bash standalone_tests/python_only_compile.sh - label: Basic Correctness Test # 30min - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] fast_check: true torch_nightly: true source_file_dependencies: @@ -99,7 +99,7 @@ steps: - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py - label: Chunked Prefill Test - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] source_file_dependencies: - vllm/ - tests/basic_correctness/test_chunked_prefill @@ -108,7 +108,7 @@ steps: - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py - label: Core Test # 10min - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] fast_check: true source_file_dependencies: - vllm/core @@ -209,7 +209,7 @@ steps: - pytest -v -s distributed/test_eplb_execute.py - label: Metrics, Tracing Test # 10min - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] num_gpus: 2 source_file_dependencies: - vllm/ @@ -228,7 +228,7 @@ steps: ##### 1 GPU test ##### - label: Regression Test # 5min - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] source_file_dependencies: - vllm/ - tests/test_regression @@ -280,7 +280,7 @@ steps: - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine - label: Examples Test # 25min - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] working_dir: "/vllm-workspace/examples" source_file_dependencies: - vllm/entrypoints @@ -305,7 +305,7 @@ steps: - VLLM_USE_V1=0 python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2 - label: Prefix Caching Test # 9min - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] source_file_dependencies: - vllm/ - tests/prefix_caching @@ -314,7 +314,7 @@ steps: - label: Platform Tests (CUDA) - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] source_file_dependencies: - vllm/ - tests/cuda @@ -355,7 +355,7 @@ steps: - pytest -v -s compile/test_async_tp.py - label: PyTorch Fullgraph Smoke Test # 9min - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] torch_nightly: true source_file_dependencies: - vllm/ @@ -368,7 +368,7 @@ steps: - pytest -v -s compile/piecewise/test_full_cudagraph.py - label: PyTorch Fullgraph Test # 18min - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] torch_nightly: true source_file_dependencies: - vllm/ @@ -377,7 +377,7 @@ steps: - pytest -v -s compile/test_full_graph.py - label: Kernels Core Operation Test - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] source_file_dependencies: - csrc/ - tests/kernels/core @@ -416,7 +416,7 @@ steps: parallelism: 2 - label: Kernels Mamba Test - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] source_file_dependencies: - csrc/mamba/ - tests/kernels/mamba @@ -424,7 +424,7 @@ steps: - pytest -v -s kernels/mamba - label: Tensorizer Test # 11min - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] soft_fail: true source_file_dependencies: - vllm/model_executor/model_loader @@ -437,7 +437,7 @@ steps: - pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py - label: Model Executor Test - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] source_file_dependencies: - vllm/model_executor - tests/model_executor @@ -447,7 +447,7 @@ steps: - pytest -v -s model_executor - label: Benchmarks # 9min - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] working_dir: "/vllm-workspace/.buildkite" source_file_dependencies: - benchmarks/ @@ -455,7 +455,7 @@ steps: - bash scripts/run-benchmarks.sh - label: Benchmarks CLI Test # 10min - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] source_file_dependencies: - vllm/ - tests/benchmarks/ @@ -494,7 +494,7 @@ steps: - pytest -s entrypoints/openai/correctness/ - label: Encoder Decoder tests # 5min - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] source_file_dependencies: - vllm/ - tests/encoder_decoder @@ -502,7 +502,7 @@ steps: - pytest -v -s encoder_decoder - label: OpenAI-Compatible Tool Use # 20 min - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] fast_check: false source_file_dependencies: - vllm/ @@ -623,7 +623,7 @@ steps: # This test is used only in PR development phase to test individual models and should never run on main - label: Custom Models Test - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] optional: true commands: - echo 'Testing custom models...' @@ -658,7 +658,7 @@ steps: ##### multi gpus test ##### - label: Distributed Comm Ops Test # 7min - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] working_dir: "/vllm-workspace/tests" num_gpus: 2 source_file_dependencies: @@ -755,7 +755,7 @@ steps: - pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins - label: Multi-step Tests (4 GPUs) # 36min - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] working_dir: "/vllm-workspace/tests" num_gpus: 4 source_file_dependencies: @@ -776,7 +776,7 @@ steps: - pytest -v -s multi_step/test_correctness_llm.py - label: Pipeline Parallelism Test # 45min - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] working_dir: "/vllm-workspace/tests" num_gpus: 4 source_file_dependencies: @@ -790,7 +790,7 @@ steps: - pytest -v -s distributed/test_pipeline_parallel.py - label: LoRA TP Test (Distributed) - mirror_hardwares: [amdexperimental, amdproduction] + mirror_hardwares: [amdexperimental] num_gpus: 4 source_file_dependencies: - vllm/lora