diff --git a/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh b/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh index 3212b660ec356..a394046d2c8fe 100755 --- a/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh +++ b/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh @@ -150,7 +150,7 @@ run_and_track_test 9 "test_multimodal.py" \ run_and_track_test 10 "test_pallas.py" \ "python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py" run_and_track_test 11 "test_struct_output_generate.py" \ - "python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py" + "python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py -k 'not test_structured_output_with_reasoning_matrices'" run_and_track_test 12 "test_moe_pallas.py" \ "python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py" run_and_track_test 13 "test_lora.py" \ diff --git a/tests/v1/tpu/test_spmd_model_weight_loading.py b/tests/v1/tpu/test_spmd_model_weight_loading.py index d36edfc3fb618..916325e41b922 100644 --- a/tests/v1/tpu/test_spmd_model_weight_loading.py +++ b/tests/v1/tpu/test_spmd_model_weight_loading.py @@ -45,11 +45,14 @@ def _get_spmd_mesh(): return MESH -@pytest.mark.parametrize("model", [ - "Qwen/Qwen2-1.5B-Instruct", - "meta-llama/Llama-3.1-8B-Instruct", - "meta-llama/Llama-3.1-70B-Instruct", -]) +@pytest.mark.parametrize( + "model", + [ + "Qwen/Qwen2-1.5B-Instruct", + # Skip large models due to CI runner disk space limitations + # "meta-llama/Llama-3.1-8B-Instruct", + # "meta-llama/Llama-3.1-70B-Instruct", + ]) def test_tpu_model_loader(model): # Skip the 70B test if there are less than 8 chips # TODO: Query using torch xla API, the query API is not working