vllm/.buildkite/scripts/tpu/quantized_v6e_1.env
Siyuan Liu 4b29d2784b
[CI][TPU] Fix docker clean up (#22271)
Signed-off-by: Siyuan Liu <lsiyuan@google.com>
2025-08-05 23:54:56 +00:00

15 lines
313 B
Bash

# Environment config
TEST_NAME=llama8bw8a8
CONTAINER_NAME=tpu-test
# vllm config
MODEL=RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8
MAX_NUM_SEQS=128
MAX_NUM_BATCHED_TOKENS=1024
TENSOR_PARALLEL_SIZE=1
MAX_MODEL_LEN=2048
DOWNLOAD_DIR=/mnt/disks/persist
EXPECTED_THROUGHPUT=10.0
INPUT_LEN=1800
OUTPUT_LEN=128