diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 771756a42f402..d11a43377548c 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -835,11 +835,11 @@ steps: - pytest -v -s tests/kernels/moe/test_flashinfer.py - pytest -v -s tests/compile/test_silu_mul_quant_fusion.py -- label: GPT-OSS Eval (Blackwell) +- label: Blackwell GPT-OSS Eval timeout_in_minutes: 60 working_dir: "/vllm-workspace/" gpu: b200 - optional: true # disable while debugging + optional: true # run on nightlies source_file_dependencies: - tests/evals/gpt_oss - vllm/model_executor/models/gpt_oss.py @@ -866,6 +866,16 @@ steps: commands: - pytest -s -v tests/quantization/test_blackwell_moe.py +- label: Blackwell LM Eval Small Models + timeout_in_minutes: 75 + gpu: b200 + optional: true # run on nightlies + source_file_dependencies: + - csrc/ + - vllm/model_executor/layers/quantization + commands: + - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-blackwell.txt --tp-size=1 + ##### 1 GPU test ##### ##### multi gpus test ##### diff --git a/tests/evals/gsm8k/configs/models-blackwell.txt b/tests/evals/gsm8k/configs/models-blackwell.txt new file mode 100644 index 0000000000000..e577645d60d6f --- /dev/null +++ b/tests/evals/gsm8k/configs/models-blackwell.txt @@ -0,0 +1,4 @@ +Qwen3-0.6B-FP8.yaml +Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml +Qwen1.5-MoE-W4A16-CT.yaml +DeepSeek-V2-Lite-Instruct-FP8.yaml