From 7c161281065ffc831a2286dbef38d566aa289cd0 Mon Sep 17 00:00:00 2001 From: "rshaw@neuralmagic.com" Date: Wed, 26 Mar 2025 01:45:24 +0000 Subject: [PATCH] updated Signed-off-by: rshaw@neuralmagic.com --- benchmarks/sweep.sh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 benchmarks/sweep.sh diff --git a/benchmarks/sweep.sh b/benchmarks/sweep.sh new file mode 100644 index 0000000000000..731cca37e1385 --- /dev/null +++ b/benchmarks/sweep.sh @@ -0,0 +1,27 @@ +MODEL=meta-llama/Llama-3.1-8B-Instruct +REQUEST_RATES=(1 10 15 20) +INPUT_LEN=1000 +OUTPUT_LEN=100 +TOTAL_SECONDS=120 + +for REQUEST_RATE in "${REQUEST_RATES[@]}"; +do + NUM_PROMPTS=$(($TOTAL_SECONDS * $REQUEST_RATE)) + + echo "" + echo "===== RUNNING $MODEL FOR $NUM_PROMPTS PROMPTS WITH $REQUEST_RATE QPS =====" + echo "" + + python3 vllm/benchmarks/benchmark_serving.py \ + --model $MODEL \ + --dataset-name random \ + --random-input-len $INPUT_LEN \ + --random-output-len $OUTPUT_LEN \ + --request-rate $REQUEST_RATE \ + --num-prompts $NUM_PROMPTS \ + --seed $REQUEST_RATE \ + --ignore-eos \ + --result-filename "$FRAMEWORK-results.json" \ + --save-result + +done