From 2ba60ec7febaa439dd40040018116f55834b4170 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= Date: Fri, 17 Oct 2025 16:13:31 +0200 Subject: [PATCH] [CI] Nixl integration tests (#27010) Signed-off-by: NickLucche --- .buildkite/test-pipeline.yaml | 11 +++++ .../nixl_integration/run_accuracy_test.sh | 20 ++++++---- .../nixl_integration/test_accuracy.py | 7 +++- .../nixl_integration/toy_proxy_server.py | 3 +- .../tp_config_sweep_accuracy_test.sh | 40 +++++++++++++++++++ 5 files changed, 72 insertions(+), 9 deletions(-) create mode 100755 tests/v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 238b6ef98bf23..69e36f2804c4d 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -1084,6 +1084,17 @@ steps: - tests/weight_loading commands: - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt + +- label: NixlConnector P/D accuracy tests (Distributed) # 30min + timeout_in_minutes: 30 + working_dir: "/vllm-workspace/tests" + num_gpus: 4 + source_file_dependencies: + - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py + - tests/v1/kv_connector/nixl_integration/ + commands: + - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt + - bash v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh ##### multi gpus test ##### diff --git a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh index ed6154462bb2b..31d437837dacb 100755 --- a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh +++ b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh @@ -34,15 +34,21 @@ else fi # Models to run -MODELS=( - "Qwen/Qwen3-0.6B" -) +MODEL_NAMES=${MODEL_NAMES:-} +if [[ -n "$MODEL_NAMES" ]]; then + MODELS=("$MODEL_NAMES") +else + MODELS=( + "Qwen/Qwen3-0.6B" + ) +fi # Number of prefill and decode instances to create NUM_PREFILL_INSTANCES=${NUM_PREFILL_INSTANCES:-1} # Default to 1 NUM_DECODE_INSTANCES=${NUM_DECODE_INSTANCES:-1} # Default to 1 PREFILLER_TP_SIZE=${PREFILLER_TP_SIZE:-1} DECODER_TP_SIZE=${DECODER_TP_SIZE:-1} +GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.2} # Find the git repository root directory GIT_ROOT=$(git rev-parse --show-toplevel) @@ -130,7 +136,7 @@ run_tests_for_model() { vllm serve $model_name \ --port $PORT \ --enforce-eager \ - --gpu-memory-utilization 0.2 \ + --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \ --tensor-parallel-size $PREFILLER_TP_SIZE \ --kv-transfer-config '$KV_CONFIG'" @@ -171,7 +177,7 @@ run_tests_for_model() { vllm serve $model_name \ --port $PORT \ --enforce-eager \ - --gpu-memory-utilization 0.2 \ + --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \ --tensor-parallel-size $DECODER_TP_SIZE \ --kv-transfer-config '$KV_CONFIG'" @@ -200,7 +206,7 @@ run_tests_for_model() { done # Build the command for the proxy server with all the hosts and ports - PROXY_CMD="python ${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py --port 8192" + PROXY_CMD="python3 ${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py --port 8192" # Add all prefill hosts and ports PROXY_CMD+=" --prefiller-hosts ${PREFILL_HOSTS[@]}" @@ -219,7 +225,7 @@ run_tests_for_model() { # Run lm eval for this model echo "Running tests for $model_name" - TEST_MODEL=$model_name python -m pytest -s -x ${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/test_accuracy.py + TEST_MODEL=$model_name python3 -m pytest -s -x ${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/test_accuracy.py # Clean up before running next model cleanup_instances diff --git a/tests/v1/kv_connector/nixl_integration/test_accuracy.py b/tests/v1/kv_connector/nixl_integration/test_accuracy.py index b301968e5bf84..a70f4caeb9370 100644 --- a/tests/v1/kv_connector/nixl_integration/test_accuracy.py +++ b/tests/v1/kv_connector/nixl_integration/test_accuracy.py @@ -12,7 +12,12 @@ FILTER = "exact_match,strict-match" RTOL = 0.03 # Model-specific expected values -EXPECTED_VALUES = {"Qwen/Qwen3-0.6B": 0.41, "deepseek-ai/deepseek-vl2-small": 0.59} +EXPECTED_VALUES = { + "Qwen/Qwen3-0.6B": 0.41, + "deepseek-ai/deepseek-vl2-small": 0.59, + "deepseek-ai/deepseek-vl2-tiny": 0.19, + "deepseek-ai/DeepSeek-V2-Lite-Chat": 0.65, +} SIMPLE_PROMPT = ( "The best part about working on vLLM is that I got to meet so many people across " diff --git a/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py b/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py index 37d70510fe256..5768fcdb57ceb 100644 --- a/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py +++ b/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py @@ -76,7 +76,8 @@ def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--port", type=int, default=8000) - parser.add_argument("--host", type=str, default="localhost") + # Always use 127.0.0.1 as localhost binds to IPv6 which is blocked on CI + parser.add_argument("--host", type=str, default="127.0.0.1") # For prefiller instances parser.add_argument( diff --git a/tests/v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh new file mode 100755 index 0000000000000..537764aafc13f --- /dev/null +++ b/tests/v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Utility to run integration tests sequentially with varying TP configurations. +SCRIPT="v1/kv_connector/nixl_integration/run_accuracy_test.sh" + +# Define test configurations +configs=( + "GPU_MEMORY_UTILIZATION=0.6 PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=2" + "GPU_MEMORY_UTILIZATION=0.6 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2" + "GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny" # MLA case + "GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny" +) + +run_tests() { + local label=$1 + local extra_env=$2 + + echo "=== Running tests (${label}) ===" + for cfg in "${configs[@]}"; do + echo "-> Running with ${cfg} ${extra_env:+and ${extra_env}}" + # Use 'env' to safely set variables without eval + if ! env ${extra_env} ${cfg} bash "${SCRIPT}"; then + echo "❌ Test failed for config: ${cfg} ${extra_env:+(${extra_env})}" + exit 1 + fi + done + echo "✅ All ${label} tests passed!" +} + +# Run tests +run_tests "default backend" "" + +# Check if FLASHINFER is set (non-empty) +if [[ -n "${FLASHINFER:-}" ]]; then + echo "FLASHINFER is set, rerunning with VLLM_ATTENTION_BACKEND=FLASHINFER" + run_tests "FLASHINFER backend" "VLLM_ATTENTION_BACKEND=FLASHINFER" +else + echo "FLASHINFER not set, skipping FLASHINFER runs." +fi