From 4ff61ababa25f4a519185013c9cce00142341f04 Mon Sep 17 00:00:00 2001 From: QiliangCui Date: Wed, 2 Jul 2025 23:46:41 -0700 Subject: [PATCH] [TPU] Add a case to cover RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8 (#20385) Signed-off-by: Qiliang Cui --- .buildkite/scripts/tpu/quantized_v6e_1.env | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .buildkite/scripts/tpu/quantized_v6e_1.env diff --git a/.buildkite/scripts/tpu/quantized_v6e_1.env b/.buildkite/scripts/tpu/quantized_v6e_1.env new file mode 100644 index 0000000000000..bab34b3be3b9a --- /dev/null +++ b/.buildkite/scripts/tpu/quantized_v6e_1.env @@ -0,0 +1,14 @@ +# Environment config +TEST_NAME=llama8bw8a8 +CONTAINER_NAME=vllm-tpu + +# vllm config +MODEL=RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8 +MAX_NUM_SEQS=128 +MAX_NUM_BATCHED_TOKENS=1024 +TENSOR_PARALLEL_SIZE=1 +MAX_MODEL_LEN=2048 +DOWNLOAD_DIR=/mnt/disks/persist +EXPECTED_THROUGHPUT=10.0 +INPUT_LEN=1800 +OUTPUT_LEN=128