From a738dbb2a1238a5e4aafb92c629af3bce4213024 Mon Sep 17 00:00:00 2001 From: QiliangCui Date: Mon, 23 Jun 2025 17:18:10 -0700 Subject: [PATCH] Update test case parameter to have the throughput above 8.0 (#19994) Signed-off-by: Qiliang Cui --- .buildkite/scripts/tpu/config_v6e_1.env | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.buildkite/scripts/tpu/config_v6e_1.env b/.buildkite/scripts/tpu/config_v6e_1.env index 441758647347..03ec116f698d 100644 --- a/.buildkite/scripts/tpu/config_v6e_1.env +++ b/.buildkite/scripts/tpu/config_v6e_1.env @@ -4,8 +4,8 @@ CONTAINER_NAME=vllm-tpu # vllm config MODEL=meta-llama/Llama-3.1-8B-Instruct -MAX_NUM_SEQS=512 -MAX_NUM_BATCHED_TOKENS=512 +MAX_NUM_SEQS=256 +MAX_NUM_BATCHED_TOKENS=1024 TENSOR_PARALLEL_SIZE=1 MAX_MODEL_LEN=2048 DOWNLOAD_DIR=/mnt/disks/persist