mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 06:44:57 +08:00
15 lines
312 B
Bash
15 lines
312 B
Bash
# Environment config
|
|
TEST_NAME=llama8bw8a8
|
|
CONTAINER_NAME=tpu-test
|
|
|
|
# vllm config
|
|
MODEL=RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8
|
|
MAX_NUM_SEQS=128
|
|
MAX_NUM_BATCHED_TOKENS=1024
|
|
TENSOR_PARALLEL_SIZE=1
|
|
MAX_MODEL_LEN=2048
|
|
DOWNLOAD_DIR=/mnt/disks/persist
|
|
EXPECTED_THROUGHPUT=8.7
|
|
INPUT_LEN=1800
|
|
OUTPUT_LEN=128
|