From 23b3134eb5ee45f2d2e97d89110f8a52e3f421a1 Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Tue, 13 May 2025 04:47:29 -0400 Subject: [PATCH] [Benchmarks] Refactor run_structured_output_benchmarks.sh (#17722) Signed-off-by: Russell Bryant --- benchmarks/run_structured_output_benchmark.sh | 101 +++++++++++++++--- 1 file changed, 85 insertions(+), 16 deletions(-) diff --git a/benchmarks/run_structured_output_benchmark.sh b/benchmarks/run_structured_output_benchmark.sh index 53dc7ed70b9cf..b043ab83e4608 100755 --- a/benchmarks/run_structured_output_benchmark.sh +++ b/benchmarks/run_structured_output_benchmark.sh @@ -1,32 +1,98 @@ #!/bin/bash -# Define the model to use -MODEL=${1:-"Qwen/Qwen2.5-7B-Instruct"} - -# Define the backend to use -BACKEND=${2:-"vllm"} - -# Define the dataset to use -DATASET=${3:-"xgrammar_bench"} - +# default values +MODEL=${MODEL:-"Qwen/Qwen2.5-7B-Instruct"} +BACKEND=${BACKEND:-"vllm"} +DATASET=${DATASET:-"xgrammar_bench"} SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -OUTPUT_DIR=${4:-"$SCRIPT_DIR/structured_output_benchmark_results"} +OUTPUT_DIR=${OUTPUT_DIR:-"$SCRIPT_DIR/structured_output_benchmark_results"} +PORT=${PORT:-8000} +STRUCTURED_OUTPUT_RATIO=${STRUCTURED_OUTPUT_RATIO:-1} +TOTAL_SECONDS=${TOTAL_SECONDS:-90} +MAX_NEW_TOKENS=${MAX_NEW_TOKENS:-300} +TOKENIZER_MODE=${TOKENIZER_MODE:-"auto"} -GUIDED_RATIO=${5:-0.5} +usage() { + echo "Usage: $0 [options]" + echo "Options:" + echo " --model MODEL Model to benchmark (default: $MODEL)" + echo " --backend BACKEND Backend to use (default: $BACKEND)" + echo " --dataset DATASET Dataset to use (default: $DATASET)" + echo " --max-new-tokens N Maximum number of tokens to generate (default: $MAX_NEW_TOKENS)" + echo " --output-dir DIR Output directory for results (default: $OUTPUT_DIR)" + echo " --port PORT Port to use (default: $PORT)" + echo " --structured-output-ratio N Ratio of structured outputs (default: $STRUCTURED_OUTPUT_RATIO)" + echo " --tokenizer-mode MODE Tokenizer mode to use (default: $TOKENIZER_MODE)" + echo " --total-seconds N Total seconds to run the benchmark (default: $TOTAL_SECONDS)" + echo " -h, --help Show this help message and exit" + exit 0 +} + +# parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + --model) + MODEL="$2" + shift 2 + ;; + --backend) + BACKEND="$2" + shift 2 + ;; + --dataset) + DATASET="$2" + shift 2 + ;; + --max-new-tokens) + MAX_NEW_TOKENS="$2" + shift 2 + ;; + --output-dir) + OUTPUT_DIR="$2" + shift 2 + ;; + --port) + PORT="$2" + shift 2 + ;; + --structured-output-ratio) + STRUCTURED_OUTPUT_RATIO="$2" + shift 2 + ;; + --tokenizer-mode) + TOKENIZER_MODE="$2" + shift 2 + ;; + --total-seconds) + TOTAL_SECONDS="$2" + shift 2 + ;; + -h|--help) + usage + ;; + *) + echo "Unknown argument: $1\n" + usage + ;; + esac +done # Create output directory if it doesn't exist mkdir -p "$OUTPUT_DIR" # Define QPS values to test -QPS_VALUES=(70 60 50 25 20 15 10) +QPS_VALUES=(25 20 15 10 5 1) # Common parameters COMMON_PARAMS="--backend $BACKEND \ --model $MODEL \ --dataset $DATASET \ - --structured-output-ratio $GUIDED_RATIO \ + --structured-output-ratio $STRUCTURED_OUTPUT_RATIO \ --save-results \ - --result-dir $OUTPUT_DIR" + --result-dir $OUTPUT_DIR \ + --output-len $MAX_NEW_TOKENS \ + --port $PORT \ + --tokenizer-mode $TOKENIZER_MODE" echo "Starting structured output benchmark with model: $MODEL" echo "Backend: $BACKEND" @@ -45,12 +111,15 @@ for qps in "${QPS_VALUES[@]}"; do # Construct filename for this run FILENAME="${BACKEND}_${qps}qps_$(basename $MODEL)_${DATASET}_${GIT_HASH}.json" + NUM_PROMPTS=$(echo "$TOTAL_SECONDS * $qps" | bc) + NUM_PROMPTS=${NUM_PROMPTS%.*} # Remove fractional part + echo "Running benchmark with $NUM_PROMPTS prompts" + # Run the benchmark python "$SCRIPT_DIR/benchmark_serving_structured_output.py" $COMMON_PARAMS \ --request-rate $qps \ --result-filename "$FILENAME" \ - --tokenizer-mode ${TOKENIZER_MODE:-"auto"} \ - --port ${PORT:-8000} + --num-prompts $NUM_PROMPTS echo "Completed benchmark with QPS: $qps" echo "----------------------------------------"