more integration tests

Signed-off-by: NickLucche <nlucches@redhat.com>
2026-05-15 23:07:56 +08:00 · 2025-10-13 14:20:41 +00:00 · 2025-10-13 14:20:41 +00:00 · 1dc9df9842
commit 1dc9df9842
parent b8d520232f
3 changed files with 59 additions and 6 deletions
--- a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
@ -27,15 +27,21 @@ else
 fi

 # Models to run
-MODELS=(
-    "Qwen/Qwen3-0.6B"
-)
+MODEL_NAMES=${MODEL_NAMES:-}
+if [[ -n "$MODEL_NAMES" ]]; then
+  MODELS=("$MODEL_NAMES")
+else
+  MODELS=(
+      "Qwen/Qwen3-0.6B"
+  )
+fi

 # Number of prefill and decode instances to create
 NUM_PREFILL_INSTANCES=${NUM_PREFILL_INSTANCES:-1} # Default to 1
 NUM_DECODE_INSTANCES=${NUM_DECODE_INSTANCES:-1}   # Default to 1
 PREFILLER_TP_SIZE=${PREFILLER_TP_SIZE:-1}
 DECODER_TP_SIZE=${DECODER_TP_SIZE:-1}
+GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.2}

 # Find the git repository root directory
 GIT_ROOT=$(git rev-parse --show-toplevel)
@ -116,7 +122,7 @@ run_tests_for_model() {
    vllm serve $model_name \
    --port $PORT \
    --enforce-eager \
-    --gpu-memory-utilization 0.2 \
+    --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
    --tensor-parallel-size $PREFILLER_TP_SIZE \
    --kv-transfer-config '$KV_CONFIG'"

@ -151,7 +157,7 @@ run_tests_for_model() {
    vllm serve $model_name \
    --port $PORT \
    --enforce-eager \
-    --gpu-memory-utilization 0.2 \
+    --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
    --tensor-parallel-size $DECODER_TP_SIZE \
    --kv-transfer-config '$KV_CONFIG'"

--- a/tests/v1/kv_connector/nixl_integration/test_accuracy.py
+++ b/tests/v1/kv_connector/nixl_integration/test_accuracy.py
@ -12,7 +12,11 @@ FILTER = "exact_match,strict-match"
 RTOL = 0.03

 # Model-specific expected values
-EXPECTED_VALUES = {"Qwen/Qwen3-0.6B": 0.41, "deepseek-ai/deepseek-vl2-small": 0.59}
+EXPECTED_VALUES = {
+    "Qwen/Qwen3-0.6B": 0.41,
+    "deepseek-ai/deepseek-vl2-small": 0.59,
+    "deepseek-ai/DeepSeek-V2-Lite-Chat": 0.65,
+}

 SIMPLE_PROMPT = (
    "The best part about working on vLLM is that I got to meet so many people across "
--- a/tests/v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh
@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Utility to run integration tests sequentially with varying TP configurations.
+# If FLASHINFER is set, reruns all tests with VLLM_ATTENTION_BACKEND=FLASHINFER.
+
+SCRIPT="tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh"
+
+# Define test configurations
+configs=(
+  "PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=2"
+  "PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2"
+  "PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=1"
+  "GPU_MEMORY_UTILIZATION=0.6 MODEL_NAMES=deepseek-ai/DeepSeek-V2-Lite-Chat" # MLA case
+  # TP greater than num heads
+)
+
+run_tests() {
+  local label=$1
+  local extra_env=$2
+
+  echo "=== Running tests (${label}) ==="
+  for cfg in "${configs[@]}"; do
+    echo "-> Running with ${cfg} ${extra_env:+and ${extra_env}}"
+    # Use 'env' to safely set variables without eval
+    if ! env ${extra_env} ${cfg} bash "${SCRIPT}"; then
+      echo "❌ Test failed for config: ${cfg} ${extra_env:+(${extra_env})}"
+      exit 1
+    fi
+  done
+  echo "✅ All ${label} tests passed!"
+}
+
+# Run base tests
+run_tests "default backend" ""
+
+# Check if FLASHINFER is set (non-empty)
+if [[ -n "${FLASHINFER:-}" ]]; then
+  echo "FLASHINFER is set, rerunning with VLLM_ATTENTION_BACKEND=FLASHINFER"
+  run_tests "FLASHINFER backend" "VLLM_ATTENTION_BACKEND=FLASHINFER"
+else
+  echo "FLASHINFER not set, skipping FLASHINFER runs."
+fi