mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 22:05:44 +08:00
Update test case parameter to have the throughput above 8.0 (#19994)
Signed-off-by: Qiliang Cui <derrhein@gmail.com>
This commit is contained in:
parent
33d5e29be9
commit
a738dbb2a1
@ -4,8 +4,8 @@ CONTAINER_NAME=vllm-tpu
|
||||
|
||||
# vllm config
|
||||
MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||
MAX_NUM_SEQS=512
|
||||
MAX_NUM_BATCHED_TOKENS=512
|
||||
MAX_NUM_SEQS=256
|
||||
MAX_NUM_BATCHED_TOKENS=1024
|
||||
TENSOR_PARALLEL_SIZE=1
|
||||
MAX_MODEL_LEN=2048
|
||||
DOWNLOAD_DIR=/mnt/disks/persist
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user