diff --git a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh index 3b0f2d102c1ff..78941e5edd4e3 100755 --- a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh +++ b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh @@ -27,15 +27,21 @@ else fi # Models to run -MODELS=( - "Qwen/Qwen3-0.6B" -) +MODEL_NAMES=${MODEL_NAMES:-} +if [[ -n "$MODEL_NAMES" ]]; then + MODELS=("$MODEL_NAMES") +else + MODELS=( + "Qwen/Qwen3-0.6B" + ) +fi # Number of prefill and decode instances to create NUM_PREFILL_INSTANCES=${NUM_PREFILL_INSTANCES:-1} # Default to 1 NUM_DECODE_INSTANCES=${NUM_DECODE_INSTANCES:-1} # Default to 1 PREFILLER_TP_SIZE=${PREFILLER_TP_SIZE:-1} DECODER_TP_SIZE=${DECODER_TP_SIZE:-1} +GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.2} # Find the git repository root directory GIT_ROOT=$(git rev-parse --show-toplevel) @@ -116,7 +122,7 @@ run_tests_for_model() { vllm serve $model_name \ --port $PORT \ --enforce-eager \ - --gpu-memory-utilization 0.2 \ + --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \ --tensor-parallel-size $PREFILLER_TP_SIZE \ --kv-transfer-config '$KV_CONFIG'" @@ -151,7 +157,7 @@ run_tests_for_model() { vllm serve $model_name \ --port $PORT \ --enforce-eager \ - --gpu-memory-utilization 0.2 \ + --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \ --tensor-parallel-size $DECODER_TP_SIZE \ --kv-transfer-config '$KV_CONFIG'" diff --git a/tests/v1/kv_connector/nixl_integration/test_accuracy.py b/tests/v1/kv_connector/nixl_integration/test_accuracy.py index b301968e5bf84..8f4fbe6ff7a26 100644 --- a/tests/v1/kv_connector/nixl_integration/test_accuracy.py +++ b/tests/v1/kv_connector/nixl_integration/test_accuracy.py @@ -12,7 +12,11 @@ FILTER = "exact_match,strict-match" RTOL = 0.03 # Model-specific expected values -EXPECTED_VALUES = {"Qwen/Qwen3-0.6B": 0.41, "deepseek-ai/deepseek-vl2-small": 0.59} +EXPECTED_VALUES = { + "Qwen/Qwen3-0.6B": 0.41, + "deepseek-ai/deepseek-vl2-small": 0.59, + "deepseek-ai/DeepSeek-V2-Lite-Chat": 0.65, +} SIMPLE_PROMPT = ( "The best part about working on vLLM is that I got to meet so many people across " diff --git a/tests/v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh new file mode 100755 index 0000000000000..d82c79081d7c8 --- /dev/null +++ b/tests/v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Utility to run integration tests sequentially with varying TP configurations. +# If FLASHINFER is set, reruns all tests with VLLM_ATTENTION_BACKEND=FLASHINFER. + +SCRIPT="tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh" + +# Define test configurations +configs=( + "PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=2" + "PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2" + "PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=1" + "GPU_MEMORY_UTILIZATION=0.6 MODEL_NAMES=deepseek-ai/DeepSeek-V2-Lite-Chat" # MLA case + # TP greater than num heads +) + +run_tests() { + local label=$1 + local extra_env=$2 + + echo "=== Running tests (${label}) ===" + for cfg in "${configs[@]}"; do + echo "-> Running with ${cfg} ${extra_env:+and ${extra_env}}" + # Use 'env' to safely set variables without eval + if ! env ${extra_env} ${cfg} bash "${SCRIPT}"; then + echo "❌ Test failed for config: ${cfg} ${extra_env:+(${extra_env})}" + exit 1 + fi + done + echo "✅ All ${label} tests passed!" +} + +# Run base tests +run_tests "default backend" "" + +# Check if FLASHINFER is set (non-empty) +if [[ -n "${FLASHINFER:-}" ]]; then + echo "FLASHINFER is set, rerunning with VLLM_ATTENTION_BACKEND=FLASHINFER" + run_tests "FLASHINFER backend" "VLLM_ATTENTION_BACKEND=FLASHINFER" +else + echo "FLASHINFER not set, skipping FLASHINFER runs." +fi