Update test case parameter to have the throughput above 8.0 (#19994)

Signed-off-by: Qiliang Cui <derrhein@gmail.com>
This commit is contained in:
QiliangCui 2025-06-23 17:18:10 -07:00 committed by GitHub
parent 33d5e29be9
commit a738dbb2a1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -4,8 +4,8 @@ CONTAINER_NAME=vllm-tpu
# vllm config # vllm config
MODEL=meta-llama/Llama-3.1-8B-Instruct MODEL=meta-llama/Llama-3.1-8B-Instruct
MAX_NUM_SEQS=512 MAX_NUM_SEQS=256
MAX_NUM_BATCHED_TOKENS=512 MAX_NUM_BATCHED_TOKENS=1024
TENSOR_PARALLEL_SIZE=1 TENSOR_PARALLEL_SIZE=1
MAX_MODEL_LEN=2048 MAX_MODEL_LEN=2048
DOWNLOAD_DIR=/mnt/disks/persist DOWNLOAD_DIR=/mnt/disks/persist