[Build/CI][DP/EP] Add QWen/Qwen3-30B-A3B-FP8 + EPLB tests to Nightly H100 and B200 (#29195)

Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
2026-03-16 11:47:09 +08:00 · 2025-11-24 11:06:17 -05:00 · 2025-11-24 11:06:17 -05:00 · e924bbb4f4
commit e924bbb4f4
parent 656516c315
3 changed files with 19 additions and 6 deletions
--- a/.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh
+++ b/.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh
@ -1,10 +1,12 @@
 #!/usr/bin/env bash
 set -euxo pipefail

-# args: [THRESHOLD] [NUM_QUESTIONS] [START_PORT]
+# args: [THRESHOLD] [NUM_QUESTIONS] [START_PORT] [DATA_PARALLEL_SIZE] [TENSOR_PARALLEL_SIZE]
 THRESHOLD=${1:-0.8}
 NUM_Q=${2:-1319}
 PORT=${3:-8020}
+DATA_PARALLEL_SIZE=${4:-2}
+TENSOR_PARALLEL_SIZE=${5:-2}
 OUT_DIR=${OUT_DIR:-/tmp/vllm-scheduled}
 mkdir -p "${OUT_DIR}"

@ -45,8 +47,10 @@ for BACK in "${BACKENDS[@]}"; do
  VLLM_ALL2ALL_BACKEND=$BACK \
  vllm serve "$MODEL" \
    --enforce-eager \
-    --tensor-parallel-size 2 \
-    --data-parallel-size 2 \
+    --enable-eplb \
+    --eplb-config '{"window_size":10, "step_interval":100, "num_redundant_experts":0, "log_balancedness":true}' \
+    --tensor-parallel-size ${TENSOR_PARALLEL_SIZE} \
+    --data-parallel-size ${DATA_PARALLEL_SIZE} \
    --enable-expert-parallel \
    --trust-remote-code \
    --max-model-len 2048 \
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@ -1486,4 +1486,4 @@ steps:
  num_gpus: 4
  working_dir: "/vllm-workspace"
  commands:
-  - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep.sh 0.8 200 8020
+  - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@ -1340,11 +1340,20 @@ steps:
  commands:
  - bash .buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh 0.25 200 8010

- label: Qwen3-30B-A3B-FP8-block Accuracy
+- label: Qwen3-30B-A3B-FP8-block Accuracy (H100)
  timeout_in_minutes: 60
  gpu: h100
  optional: true
  num_gpus: 4
  working_dir: "/vllm-workspace"
  commands:
-  - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep.sh 0.8 200 8020
+  - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020
+
+- label: Qwen3-30B-A3B-FP8-block Accuracy (B200)
+  timeout_in_minutes: 60
+  gpu: b200
+  optional: true
+  num_gpus: 2
+  working_dir: "/vllm-workspace"
+  commands:
+  - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020 2 1