# Environment config TEST_NAME=llama8b CONTAINER_NAME=tpu-test # vllm config MODEL=meta-llama/Llama-3.1-8B-Instruct MAX_NUM_SEQS=256 MAX_NUM_BATCHED_TOKENS=1024 TENSOR_PARALLEL_SIZE=1 MAX_MODEL_LEN=2048 DOWNLOAD_DIR=/mnt/disks/persist EXPECTED_THROUGHPUT=8.0 INPUT_LEN=1800 OUTPUT_LEN=128