diff --git a/.buildkite/scripts/tpu/quantized_v6e_1.env b/.buildkite/scripts/tpu/quantized_v6e_1.env new file mode 100644 index 0000000000000..bab34b3be3b9a --- /dev/null +++ b/.buildkite/scripts/tpu/quantized_v6e_1.env @@ -0,0 +1,14 @@ +# Environment config +TEST_NAME=llama8bw8a8 +CONTAINER_NAME=vllm-tpu + +# vllm config +MODEL=RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8 +MAX_NUM_SEQS=128 +MAX_NUM_BATCHED_TOKENS=1024 +TENSOR_PARALLEL_SIZE=1 +MAX_MODEL_LEN=2048 +DOWNLOAD_DIR=/mnt/disks/persist +EXPECTED_THROUGHPUT=10.0 +INPUT_LEN=1800 +OUTPUT_LEN=128