mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-10 08:49:06 +08:00
[Build/CI][DP/EP] Add QWen/Qwen3-30B-A3B-FP8 + EPLB tests to Nightly H100 and B200 (#29195)
Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
This commit is contained in:
parent
656516c315
commit
e924bbb4f4
@ -1,10 +1,12 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
set -euxo pipefail
|
set -euxo pipefail
|
||||||
|
|
||||||
# args: [THRESHOLD] [NUM_QUESTIONS] [START_PORT]
|
# args: [THRESHOLD] [NUM_QUESTIONS] [START_PORT] [DATA_PARALLEL_SIZE] [TENSOR_PARALLEL_SIZE]
|
||||||
THRESHOLD=${1:-0.8}
|
THRESHOLD=${1:-0.8}
|
||||||
NUM_Q=${2:-1319}
|
NUM_Q=${2:-1319}
|
||||||
PORT=${3:-8020}
|
PORT=${3:-8020}
|
||||||
|
DATA_PARALLEL_SIZE=${4:-2}
|
||||||
|
TENSOR_PARALLEL_SIZE=${5:-2}
|
||||||
OUT_DIR=${OUT_DIR:-/tmp/vllm-scheduled}
|
OUT_DIR=${OUT_DIR:-/tmp/vllm-scheduled}
|
||||||
mkdir -p "${OUT_DIR}"
|
mkdir -p "${OUT_DIR}"
|
||||||
|
|
||||||
@ -45,8 +47,10 @@ for BACK in "${BACKENDS[@]}"; do
|
|||||||
VLLM_ALL2ALL_BACKEND=$BACK \
|
VLLM_ALL2ALL_BACKEND=$BACK \
|
||||||
vllm serve "$MODEL" \
|
vllm serve "$MODEL" \
|
||||||
--enforce-eager \
|
--enforce-eager \
|
||||||
--tensor-parallel-size 2 \
|
--enable-eplb \
|
||||||
--data-parallel-size 2 \
|
--eplb-config '{"window_size":10, "step_interval":100, "num_redundant_experts":0, "log_balancedness":true}' \
|
||||||
|
--tensor-parallel-size ${TENSOR_PARALLEL_SIZE} \
|
||||||
|
--data-parallel-size ${DATA_PARALLEL_SIZE} \
|
||||||
--enable-expert-parallel \
|
--enable-expert-parallel \
|
||||||
--trust-remote-code \
|
--trust-remote-code \
|
||||||
--max-model-len 2048 \
|
--max-model-len 2048 \
|
||||||
@ -1486,4 +1486,4 @@ steps:
|
|||||||
num_gpus: 4
|
num_gpus: 4
|
||||||
working_dir: "/vllm-workspace"
|
working_dir: "/vllm-workspace"
|
||||||
commands:
|
commands:
|
||||||
- bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep.sh 0.8 200 8020
|
- bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020
|
||||||
|
|||||||
@ -1340,11 +1340,20 @@ steps:
|
|||||||
commands:
|
commands:
|
||||||
- bash .buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh 0.25 200 8010
|
- bash .buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh 0.25 200 8010
|
||||||
|
|
||||||
- label: Qwen3-30B-A3B-FP8-block Accuracy
|
- label: Qwen3-30B-A3B-FP8-block Accuracy (H100)
|
||||||
timeout_in_minutes: 60
|
timeout_in_minutes: 60
|
||||||
gpu: h100
|
gpu: h100
|
||||||
optional: true
|
optional: true
|
||||||
num_gpus: 4
|
num_gpus: 4
|
||||||
working_dir: "/vllm-workspace"
|
working_dir: "/vllm-workspace"
|
||||||
commands:
|
commands:
|
||||||
- bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep.sh 0.8 200 8020
|
- bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020
|
||||||
|
|
||||||
|
- label: Qwen3-30B-A3B-FP8-block Accuracy (B200)
|
||||||
|
timeout_in_minutes: 60
|
||||||
|
gpu: b200
|
||||||
|
optional: true
|
||||||
|
num_gpus: 2
|
||||||
|
working_dir: "/vllm-workspace"
|
||||||
|
commands:
|
||||||
|
- bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020 2 1
|
||||||
Loading…
x
Reference in New Issue
Block a user