From 4f76a05f4f6c70a821e4a092d69331c993731614 Mon Sep 17 00:00:00 2001 From: ericehanley Date: Tue, 22 Jul 2025 22:33:00 -0500 Subject: [PATCH] [BugFix] Update python to python3 calls for image; fix prefix & input calculations. (#21391) Signed-off-by: Eric Hanley Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- benchmarks/auto_tune/auto_tune.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/benchmarks/auto_tune/auto_tune.sh b/benchmarks/auto_tune/auto_tune.sh index 159ee14214756..eaa28ea5c92b9 100644 --- a/benchmarks/auto_tune/auto_tune.sh +++ b/benchmarks/auto_tune/auto_tune.sh @@ -126,11 +126,12 @@ run_benchmark() { # get a basic qps by using request-rate inf bm_log="$LOG_FOLDER/bm_log_${max_num_seqs}_${max_num_batched_tokens}_requestrate_inf.txt" prefix_len=$(( INPUT_LEN * MIN_CACHE_HIT_PCT / 100 )) - python benchmarks/benchmark_serving.py \ +adjusted_input_len=$(( INPUT_LEN - prefix_len )) + python3 benchmarks/benchmark_serving.py \ --backend vllm \ --model $MODEL \ --dataset-name random \ - --random-input-len $INPUT_LEN \ + --random-input-len $adjusted_input_len \ --random-output-len $OUTPUT_LEN \ --ignore-eos \ --disable-tqdm \ @@ -159,11 +160,11 @@ run_benchmark() { curl -X POST http://0.0.0.0:8004/reset_prefix_cache sleep 5 bm_log="$LOG_FOLDER/bm_log_${max_num_seqs}_${max_num_batched_tokens}_requestrate_${request_rate}.txt" - python benchmarks/benchmark_serving.py \ + python3 benchmarks/benchmark_serving.py \ --backend vllm \ --model $MODEL \ --dataset-name random \ - --random-input-len $INPUT_LEN \ + --random-input-len $adjusted_input_len \ --random-output-len $OUTPUT_LEN \ --ignore-eos \ --disable-tqdm \