diff --git a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh index 31d437837dacb..a9817313cf022 100755 --- a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh +++ b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh @@ -178,8 +178,16 @@ run_tests_for_model() { --port $PORT \ --enforce-eager \ --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \ - --tensor-parallel-size $DECODER_TP_SIZE \ --kv-transfer-config '$KV_CONFIG'" + + # DP-EP attention mode + if [[ -z "$DP_EP" ]]; then + BASE_CMD="${BASE_CMD} --tensor-parallel-size $DECODER_TP_SIZE" + else + echo "DP-EP Attention enabled, deploying with dp=DECODER_TP_SIZE and tp=1" + BASE_CMD="${BASE_CMD} --data-parallel-size $DECODER_TP_SIZE \ + --tensor-parallel-size 1 --enable-expert-parallel" + fi if [ -n "$model_args" ]; then FULL_CMD="$BASE_CMD $model_args" diff --git a/tests/v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh index 537764aafc13f..9308c81da0635 100755 --- a/tests/v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh +++ b/tests/v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh @@ -10,6 +10,7 @@ configs=( "GPU_MEMORY_UTILIZATION=0.6 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2" "GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny" # MLA case "GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny" + "DP_EP=1 GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny" # MLA+P-TP1, D-DPEP=2 (TP=1) ) run_tests() {