more integration tests

Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
NickLucche 2025-10-13 14:20:41 +00:00
parent b8d520232f
commit 1dc9df9842
3 changed files with 59 additions and 6 deletions

View File

@ -27,15 +27,21 @@ else
fi
# Models to run
MODELS=(
"Qwen/Qwen3-0.6B"
)
MODEL_NAMES=${MODEL_NAMES:-}
if [[ -n "$MODEL_NAMES" ]]; then
MODELS=("$MODEL_NAMES")
else
MODELS=(
"Qwen/Qwen3-0.6B"
)
fi
# Number of prefill and decode instances to create
NUM_PREFILL_INSTANCES=${NUM_PREFILL_INSTANCES:-1} # Default to 1
NUM_DECODE_INSTANCES=${NUM_DECODE_INSTANCES:-1} # Default to 1
PREFILLER_TP_SIZE=${PREFILLER_TP_SIZE:-1}
DECODER_TP_SIZE=${DECODER_TP_SIZE:-1}
GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.2}
# Find the git repository root directory
GIT_ROOT=$(git rev-parse --show-toplevel)
@ -116,7 +122,7 @@ run_tests_for_model() {
vllm serve $model_name \
--port $PORT \
--enforce-eager \
--gpu-memory-utilization 0.2 \
--gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
--tensor-parallel-size $PREFILLER_TP_SIZE \
--kv-transfer-config '$KV_CONFIG'"
@ -151,7 +157,7 @@ run_tests_for_model() {
vllm serve $model_name \
--port $PORT \
--enforce-eager \
--gpu-memory-utilization 0.2 \
--gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
--tensor-parallel-size $DECODER_TP_SIZE \
--kv-transfer-config '$KV_CONFIG'"

View File

@ -12,7 +12,11 @@ FILTER = "exact_match,strict-match"
RTOL = 0.03
# Model-specific expected values
EXPECTED_VALUES = {"Qwen/Qwen3-0.6B": 0.41, "deepseek-ai/deepseek-vl2-small": 0.59}
EXPECTED_VALUES = {
"Qwen/Qwen3-0.6B": 0.41,
"deepseek-ai/deepseek-vl2-small": 0.59,
"deepseek-ai/DeepSeek-V2-Lite-Chat": 0.65,
}
SIMPLE_PROMPT = (
"The best part about working on vLLM is that I got to meet so many people across "

View File

@ -0,0 +1,43 @@
#!/usr/bin/env bash
set -euo pipefail
# Utility to run integration tests sequentially with varying TP configurations.
# If FLASHINFER is set, reruns all tests with VLLM_ATTENTION_BACKEND=FLASHINFER.
SCRIPT="tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh"
# Define test configurations
configs=(
"PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=2"
"PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2"
"PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=1"
"GPU_MEMORY_UTILIZATION=0.6 MODEL_NAMES=deepseek-ai/DeepSeek-V2-Lite-Chat" # MLA case
# TP greater than num heads
)
run_tests() {
local label=$1
local extra_env=$2
echo "=== Running tests (${label}) ==="
for cfg in "${configs[@]}"; do
echo "-> Running with ${cfg} ${extra_env:+and ${extra_env}}"
# Use 'env' to safely set variables without eval
if ! env ${extra_env} ${cfg} bash "${SCRIPT}"; then
echo "❌ Test failed for config: ${cfg} ${extra_env:+(${extra_env})}"
exit 1
fi
done
echo "✅ All ${label} tests passed!"
}
# Run base tests
run_tests "default backend" ""
# Check if FLASHINFER is set (non-empty)
if [[ -n "${FLASHINFER:-}" ]]; then
echo "FLASHINFER is set, rerunning with VLLM_ATTENTION_BACKEND=FLASHINFER"
run_tests "FLASHINFER backend" "VLLM_ATTENTION_BACKEND=FLASHINFER"
else
echo "FLASHINFER not set, skipping FLASHINFER runs."
fi