Update test case parameter to have the throughput above 8.0 (#19994)

Signed-off-by: Qiliang Cui <derrhein@gmail.com>
2025-12-10 06:35:00 +08:00 · 2025-06-23 17:18:10 -07:00 · 2025-06-23 17:18:10 -07:00 · a738dbb2a1
commit a738dbb2a1
parent 33d5e29be9
1 changed files with 2 additions and 2 deletions
--- a/.buildkite/scripts/tpu/config_v6e_1.env
+++ b/.buildkite/scripts/tpu/config_v6e_1.env
@ -4,8 +4,8 @@ CONTAINER_NAME=vllm-tpu
 # vllm config
 MODEL=meta-llama/Llama-3.1-8B-Instruct
-MAX_NUM_SEQS=512
+MAX_NUM_SEQS=256
-MAX_NUM_BATCHED_TOKENS=512
+MAX_NUM_BATCHED_TOKENS=1024
 TENSOR_PARALLEL_SIZE=1
 MAX_MODEL_LEN=2048
 DOWNLOAD_DIR=/mnt/disks/persist