From 23b3134eb5ee45f2d2e97d89110f8a52e3f421a1 Mon Sep 17 00:00:00 2001
From: Russell Bryant <rbryant@redhat.com>
Date: Tue, 13 May 2025 04:47:29 -0400
Subject: [PATCH] [Benchmarks] Refactor run_structured_output_benchmarks.sh
 (#17722)

Signed-off-by: Russell Bryant <rbryant@redhat.com>
---
 benchmarks/run_structured_output_benchmark.sh | 101 +++++++++++++++---
 1 file changed, 85 insertions(+), 16 deletions(-)

diff --git a/benchmarks/run_structured_output_benchmark.sh b/benchmarks/run_structured_output_benchmark.sh
index 53dc7ed70b9cf..b043ab83e4608 100755
--- a/benchmarks/run_structured_output_benchmark.sh
+++ b/benchmarks/run_structured_output_benchmark.sh
@@ -1,32 +1,98 @@
 #!/bin/bash
 
-# Define the model to use
-MODEL=${1:-"Qwen/Qwen2.5-7B-Instruct"}
-
-# Define the backend to use
-BACKEND=${2:-"vllm"}
-
-# Define the dataset to use
-DATASET=${3:-"xgrammar_bench"}
-
+# default values
+MODEL=${MODEL:-"Qwen/Qwen2.5-7B-Instruct"}
+BACKEND=${BACKEND:-"vllm"}
+DATASET=${DATASET:-"xgrammar_bench"}
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-OUTPUT_DIR=${4:-"$SCRIPT_DIR/structured_output_benchmark_results"}
+OUTPUT_DIR=${OUTPUT_DIR:-"$SCRIPT_DIR/structured_output_benchmark_results"}
+PORT=${PORT:-8000}
+STRUCTURED_OUTPUT_RATIO=${STRUCTURED_OUTPUT_RATIO:-1}
+TOTAL_SECONDS=${TOTAL_SECONDS:-90}
+MAX_NEW_TOKENS=${MAX_NEW_TOKENS:-300}
+TOKENIZER_MODE=${TOKENIZER_MODE:-"auto"}
 
-GUIDED_RATIO=${5:-0.5}
+usage() {
+    echo "Usage: $0 [options]"
+    echo "Options:"
+    echo "  --model MODEL                  Model to benchmark (default: $MODEL)"
+    echo "  --backend BACKEND              Backend to use (default: $BACKEND)" 
+    echo "  --dataset DATASET              Dataset to use (default: $DATASET)"
+    echo "  --max-new-tokens N             Maximum number of tokens to generate (default: $MAX_NEW_TOKENS)"
+    echo "  --output-dir DIR               Output directory for results (default: $OUTPUT_DIR)"
+    echo "  --port PORT                    Port to use (default: $PORT)"
+    echo "  --structured-output-ratio N    Ratio of structured outputs (default: $STRUCTURED_OUTPUT_RATIO)"
+    echo "  --tokenizer-mode MODE          Tokenizer mode to use (default: $TOKENIZER_MODE)"
+    echo "  --total-seconds N              Total seconds to run the benchmark (default: $TOTAL_SECONDS)"
+    echo "  -h, --help                     Show this help message and exit"
+    exit 0
+}
+
+# parse command line arguments
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    --model)
+      MODEL="$2"
+      shift 2
+      ;;
+    --backend)
+      BACKEND="$2"
+      shift 2
+      ;;
+    --dataset)
+      DATASET="$2"
+      shift 2
+      ;;
+    --max-new-tokens)
+      MAX_NEW_TOKENS="$2"
+      shift 2
+      ;;
+    --output-dir)
+      OUTPUT_DIR="$2"
+      shift 2
+      ;;
+    --port)
+      PORT="$2"
+      shift 2
+      ;;
+    --structured-output-ratio)
+      STRUCTURED_OUTPUT_RATIO="$2"
+      shift 2
+      ;;
+    --tokenizer-mode)
+      TOKENIZER_MODE="$2"
+      shift 2
+      ;;
+    --total-seconds)
+      TOTAL_SECONDS="$2"
+      shift 2
+      ;;
+    -h|--help)
+      usage
+      ;;
+    *)
+      echo "Unknown argument: $1\n"
+      usage
+      ;;
+  esac
+done
 
 # Create output directory if it doesn't exist
 mkdir -p "$OUTPUT_DIR"
 
 # Define QPS values to test
-QPS_VALUES=(70 60 50 25 20 15 10)
+QPS_VALUES=(25 20 15 10 5 1)
 
 # Common parameters
 COMMON_PARAMS="--backend $BACKEND \
                --model $MODEL \
                --dataset $DATASET \
-               --structured-output-ratio $GUIDED_RATIO \
+               --structured-output-ratio $STRUCTURED_OUTPUT_RATIO \
                --save-results \
-               --result-dir $OUTPUT_DIR"
+               --result-dir $OUTPUT_DIR \
+               --output-len $MAX_NEW_TOKENS \
+               --port $PORT \
+               --tokenizer-mode $TOKENIZER_MODE"
 
 echo "Starting structured output benchmark with model: $MODEL"
 echo "Backend: $BACKEND"
@@ -45,12 +111,15 @@ for qps in "${QPS_VALUES[@]}"; do
   # Construct filename for this run
   FILENAME="${BACKEND}_${qps}qps_$(basename $MODEL)_${DATASET}_${GIT_HASH}.json"
 
+  NUM_PROMPTS=$(echo "$TOTAL_SECONDS * $qps" | bc)
+  NUM_PROMPTS=${NUM_PROMPTS%.*}  # Remove fractional part
+  echo "Running benchmark with $NUM_PROMPTS prompts"
+
   # Run the benchmark
   python "$SCRIPT_DIR/benchmark_serving_structured_output.py" $COMMON_PARAMS \
     --request-rate $qps \
     --result-filename "$FILENAME" \
-    --tokenizer-mode ${TOKENIZER_MODE:-"auto"} \
-    --port ${PORT:-8000}
+    --num-prompts $NUM_PROMPTS
 
   echo "Completed benchmark with QPS: $qps"
   echo "----------------------------------------"