# Environment config TEST_NAME=llama8bw8a8 CONTAINER_NAME=tpu-test # vllm config MODEL=RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8 MAX_NUM_SEQS=128 MAX_NUM_BATCHED_TOKENS=1024 TENSOR_PARALLEL_SIZE=1 MAX_MODEL_LEN=2048 DOWNLOAD_DIR=/mnt/disks/persist EXPECTED_THROUGHPUT=8.7 INPUT_LEN=1800 OUTPUT_LEN=128