diff --git a/.buildkite/scripts/tpu/config_v6e_1.env b/.buildkite/scripts/tpu/config_v6e_1.env index 441758647347..03ec116f698d 100644 --- a/.buildkite/scripts/tpu/config_v6e_1.env +++ b/.buildkite/scripts/tpu/config_v6e_1.env @@ -4,8 +4,8 @@ CONTAINER_NAME=vllm-tpu # vllm config MODEL=meta-llama/Llama-3.1-8B-Instruct -MAX_NUM_SEQS=512 -MAX_NUM_BATCHED_TOKENS=512 +MAX_NUM_SEQS=256 +MAX_NUM_BATCHED_TOKENS=1024 TENSOR_PARALLEL_SIZE=1 MAX_MODEL_LEN=2048 DOWNLOAD_DIR=/mnt/disks/persist