Update test case parameter to have the throughput above 8.0 (#19994)

Signed-off-by: Qiliang Cui <derrhein@gmail.com>
2026-01-29 18:47:14 +08:00 · 2025-06-23 17:18:10 -07:00 · 2025-06-23 17:18:10 -07:00 · a738dbb2a1
commit a738dbb2a1
parent 33d5e29be9
1 changed files with 2 additions and 2 deletions
--- a/.buildkite/scripts/tpu/config_v6e_1.env
+++ b/.buildkite/scripts/tpu/config_v6e_1.env
@ -4,8 +4,8 @@ CONTAINER_NAME=vllm-tpu

 # vllm config
 MODEL=meta-llama/Llama-3.1-8B-Instruct
-MAX_NUM_SEQS=512
-MAX_NUM_BATCHED_TOKENS=512
+MAX_NUM_SEQS=256
+MAX_NUM_BATCHED_TOKENS=1024
 TENSOR_PARALLEL_SIZE=1
 MAX_MODEL_LEN=2048
 DOWNLOAD_DIR=/mnt/disks/persist