diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index d5d4043a1d5bc..67088caa8150b 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -51,7 +51,7 @@ steps: - label: Async Engine, Inputs, Utils, Worker Test # 10min timeout_in_minutes: 15 mirror_hardwares: [amdexperimental, amdproduction, amdtentative] - agent_pool: mi325_1 + agent_pool: mi355_1 grade: Blocking source_file_dependencies: - vllm/ @@ -64,7 +64,7 @@ steps: - label: Async Engine, Inputs, Utils, Worker, Config Test (CPU) # 15min timeout_in_minutes: 20 mirror_hardwares: [amdexperimental, amdproduction, amdtentative] - agent_pool: mi325_1 + agent_pool: mi355_1 grade: Blocking source_file_dependencies: - vllm/ @@ -99,7 +99,7 @@ steps: - label: Basic Correctness Test # 20min timeout_in_minutes: 30 mirror_hardwares: [amdexperimental, amdproduction] - agent_pool: mi325_1 + agent_pool: mi355_1 # grade: Blocking fast_check: true torch_nightly: true @@ -116,7 +116,7 @@ steps: - label: Entrypoints Unit Tests # 5min mirror_hardwares: [amdexperimental, amdproduction, amdtentative] - agent_pool: mi325_1 + agent_pool: mi355_1 grade: Blocking timeout_in_minutes: 10 working_dir: "/vllm-workspace/tests" @@ -131,7 +131,7 @@ steps: - label: Entrypoints Integration Test (LLM) # 30min timeout_in_minutes: 40 mirror_hardwares: [amdexperimental, amdproduction] - agent_pool: mi325_1 + agent_pool: mi355_1 # grade: Blocking working_dir: "/vllm-workspace/tests" fast_check: true @@ -254,7 +254,7 @@ steps: - label: EPLB Algorithm Test # 5min mirror_hardwares: [amdexperimental, amdproduction, amdtentative] - agent_pool: mi325_1 + agent_pool: mi355_1 grade: Blocking timeout_in_minutes: 15 working_dir: "/vllm-workspace/tests" @@ -266,7 +266,7 @@ steps: - label: EPLB Execution Test # 10min mirror_hardwares: [amdexperimental, amdproduction] - agent_pool: mi325_4 + agent_pool: mi355_4 # grade: Blocking timeout_in_minutes: 20 working_dir: "/vllm-workspace/tests" @@ -281,7 +281,7 @@ steps: - label: Metrics, Tracing Test # 12min timeout_in_minutes: 20 mirror_hardwares: [amdexperimental, amdproduction] - agent_pool: mi325_2 + agent_pool: mi355_2 # grade: Blocking num_gpus: 2 source_file_dependencies: @@ -301,7 +301,7 @@ steps: - label: Regression Test # 7min timeout_in_minutes: 20 mirror_hardwares: [amdexperimental, amdproduction, amdtentative] - agent_pool: mi325_1 + agent_pool: mi355_1 grade: Blocking source_file_dependencies: - vllm/ @@ -343,7 +343,7 @@ steps: - label: V1 Test entrypoints # 35min timeout_in_minutes: 50 mirror_hardwares: [amdexperimental, amdproduction, amdtentative] - agent_pool: mi325_1 + agent_pool: mi355_1 grade: Blocking source_file_dependencies: - vllm/ @@ -544,7 +544,7 @@ steps: - label: PyTorch Fullgraph Test # 27min timeout_in_minutes: 40 mirror_hardwares: [amdexperimental, amdproduction] - agent_pool: mi325_1 + agent_pool: mi355_1 # grade: Blocking torch_nightly: true source_file_dependencies: @@ -715,6 +715,7 @@ steps: # we can only upgrade after this is resolved # TODO(jerryzh168): resolve the above comment - uv pip install --system torchao==0.13.0 + - uv pip install --system conch-triton-kernels - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py - label: LM Eval Small Models # 15min @@ -934,6 +935,18 @@ steps: commands: - pytest -v -s models/language/pooling_mteb_test +- label: Multi-Modal Processor Test (CPU) + timeout_in_minutes: 60 + mirror_hardwares: [amdexperimental] + agent_pool: mi325_1 + source_file_dependencies: + - vllm/ + - tests/models/multimodal + no_gpu: true + commands: + - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git + - pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py + - label: Multi-Modal Processor Test # 44min timeout_in_minutes: 60 mirror_hardwares: [amdexperimental] @@ -1472,14 +1485,14 @@ steps: working_dir: "/vllm-workspace/" num_gpus: 2 commands: - - pytest -v -s tests/compile/distributed/test_async_tp.py + - VLLM_TEST_CLEAN_GPU_MEMORY=1 pytest -v -s tests/compile/distributed/test_async_tp.py - pytest -v -s tests/compile/distributed/test_sequence_parallelism.py - pytest -v -s tests/compile/distributed/test_fusion_all_reduce.py #- pytest -v -s tests/compile/distributed/test_fusions_e2e.py::test_tp2_attn_quant_allreduce_rmsnorm - - "pytest -v -s tests/compile/distributed/test_fusions_e2e.py -k 'not Llama-4'" - - pytest -v -s tests/distributed/test_sequence_parallel.py + - "VLLM_TEST_CLEAN_GPU_MEMORY=1 pytest -v -s tests/compile/distributed/test_fusions_e2e.py -k 'not Llama-4'" + - VLLM_TEST_CLEAN_GPU_MEMORY=1 pytest -v -s tests/distributed/test_sequence_parallel.py - pytest -v -s tests/distributed/test_context_parallel.py - - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1 --dp-size=2 --max-model-len 2048 + - HIP_VISIBLE_DEVICES=0,1 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1 --dp-size=2 --max-model-len 2048 - pytest -v -s tests/v1/distributed/test_dbo.py ##### B200 test #####