[TPU] Add a case to cover RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8 (#20385)

Signed-off-by: Qiliang Cui <derrhein@gmail.com>
This commit is contained in:
QiliangCui 2025-07-02 23:46:41 -07:00 committed by GitHub
parent 0ec3779df7
commit 4ff61ababa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -0,0 +1,14 @@
# Environment config
TEST_NAME=llama8bw8a8
CONTAINER_NAME=vllm-tpu
# vllm config
MODEL=RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8
MAX_NUM_SEQS=128
MAX_NUM_BATCHED_TOKENS=1024
TENSOR_PARALLEL_SIZE=1
MAX_MODEL_LEN=2048
DOWNLOAD_DIR=/mnt/disks/persist
EXPECTED_THROUGHPUT=10.0
INPUT_LEN=1800
OUTPUT_LEN=128