mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 06:35:00 +08:00
Update test case parameter to have the throughput above 8.0 (#19994)
Signed-off-by: Qiliang Cui <derrhein@gmail.com>
This commit is contained in:
parent
33d5e29be9
commit
a738dbb2a1
@ -4,8 +4,8 @@ CONTAINER_NAME=vllm-tpu
|
|||||||
|
|
||||||
# vllm config
|
# vllm config
|
||||||
MODEL=meta-llama/Llama-3.1-8B-Instruct
|
MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||||
MAX_NUM_SEQS=512
|
MAX_NUM_SEQS=256
|
||||||
MAX_NUM_BATCHED_TOKENS=512
|
MAX_NUM_BATCHED_TOKENS=1024
|
||||||
TENSOR_PARALLEL_SIZE=1
|
TENSOR_PARALLEL_SIZE=1
|
||||||
MAX_MODEL_LEN=2048
|
MAX_MODEL_LEN=2048
|
||||||
DOWNLOAD_DIR=/mnt/disks/persist
|
DOWNLOAD_DIR=/mnt/disks/persist
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user