mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 02:44:57 +08:00
[CI] Nixl integration tests DP-EP (#27199)
Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
parent
265ecb05fb
commit
bfa59be8f1
@ -178,8 +178,16 @@ run_tests_for_model() {
|
||||
--port $PORT \
|
||||
--enforce-eager \
|
||||
--gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
|
||||
--tensor-parallel-size $DECODER_TP_SIZE \
|
||||
--kv-transfer-config '$KV_CONFIG'"
|
||||
|
||||
# DP-EP attention mode
|
||||
if [[ -z "$DP_EP" ]]; then
|
||||
BASE_CMD="${BASE_CMD} --tensor-parallel-size $DECODER_TP_SIZE"
|
||||
else
|
||||
echo "DP-EP Attention enabled, deploying with dp=DECODER_TP_SIZE and tp=1"
|
||||
BASE_CMD="${BASE_CMD} --data-parallel-size $DECODER_TP_SIZE \
|
||||
--tensor-parallel-size 1 --enable-expert-parallel"
|
||||
fi
|
||||
|
||||
if [ -n "$model_args" ]; then
|
||||
FULL_CMD="$BASE_CMD $model_args"
|
||||
|
||||
@ -10,6 +10,7 @@ configs=(
|
||||
"GPU_MEMORY_UTILIZATION=0.6 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2"
|
||||
"GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny" # MLA case
|
||||
"GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny"
|
||||
"DP_EP=1 GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny" # MLA+P-TP1, D-DPEP=2 (TP=1)
|
||||
)
|
||||
|
||||
run_tests() {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user