mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-07 17:42:21 +08:00
Improve flexibility of auto_tune.sh execution. (#23766)
Signed-off-by: Anthony Su <50185138+anthonsu@users.noreply.github.com> Signed-off-by: anthonsu <50185138+anthonsu@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
parent
51d5e9be7d
commit
04f3c35cff
@ -31,6 +31,12 @@ cd vllm
|
|||||||
|
|
||||||
You must set the following variables at the top of the script before execution.
|
You must set the following variables at the top of the script before execution.
|
||||||
|
|
||||||
|
Note: You can also override the default values below via environment variables when running the script.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
MODEL=meta-llama/Llama-3.3-70B-Instruct SYSTEM=TPU TP=8 DOWNLOAD_DIR='' INPUT_LEN=128 OUTPUT_LEN=2048 MAX_MODEL_LEN=2300 MIN_CACHE_HIT_PCT=0 MAX_LATENCY_ALLOWED_MS=100000000000 NUM_SEQS_LIST="128 256" NUM_BATCHED_TOKENS_LIST="1024 2048 4096" VLLM_LOGGING_LEVEL=DEBUG bash auto_tune.sh
|
||||||
|
```
|
||||||
|
|
||||||
| Variable | Description | Example Value |
|
| Variable | Description | Example Value |
|
||||||
| --- | --- | --- |
|
| --- | --- | --- |
|
||||||
| `BASE` | **Required.** The absolute path to the parent directory of your vLLM repository directory. | `"$HOME"` |
|
| `BASE` | **Required.** The absolute path to the parent directory of your vLLM repository directory. | `"$HOME"` |
|
||||||
|
|||||||
@ -5,25 +5,41 @@
|
|||||||
|
|
||||||
TAG=$(date +"%Y_%m_%d_%H_%M")
|
TAG=$(date +"%Y_%m_%d_%H_%M")
|
||||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
BASE="$SCRIPT_DIR/../../.."
|
VLLM_LOGGING_LEVEL=${VLLM_LOGGING_LEVEL:-INFO}
|
||||||
MODEL="meta-llama/Llama-3.1-8B-Instruct"
|
BASE=${BASE:-"$SCRIPT_DIR/../../.."}
|
||||||
SYSTEM="TPU"
|
MODEL=${MODEL:-"meta-llama/Llama-3.1-8B-Instruct"}
|
||||||
TP=1
|
SYSTEM=${SYSTEM:-"TPU"}
|
||||||
DOWNLOAD_DIR=""
|
TP=${TP:-1}
|
||||||
INPUT_LEN=4000
|
DOWNLOAD_DIR=${DOWNLOAD_DIR:-""}
|
||||||
OUTPUT_LEN=16
|
INPUT_LEN=${INPUT_LEN:-4000}
|
||||||
MAX_MODEL_LEN=4096
|
OUTPUT_LEN=${OUTPUT_LEN:-16}
|
||||||
MIN_CACHE_HIT_PCT=0
|
MAX_MODEL_LEN=${MAX_MODEL_LEN:-4096}
|
||||||
MAX_LATENCY_ALLOWED_MS=100000000000
|
MIN_CACHE_HIT_PCT=${MIN_CACHE_HIT_PCT:-0}
|
||||||
NUM_SEQS_LIST="128 256"
|
MAX_LATENCY_ALLOWED_MS=${MAX_LATENCY_ALLOWED_MS:-100000000000}
|
||||||
NUM_BATCHED_TOKENS_LIST="512 1024 2048 4096"
|
NUM_SEQS_LIST=${NUM_SEQS_LIST:-"128 256"}
|
||||||
|
NUM_BATCHED_TOKENS_LIST=${NUM_BATCHED_TOKENS_LIST:-"512 1024 2048 4096"}
|
||||||
|
|
||||||
LOG_FOLDER="$BASE/auto-benchmark/$TAG"
|
LOG_FOLDER="$BASE/auto-benchmark/$TAG"
|
||||||
RESULT="$LOG_FOLDER/result.txt"
|
RESULT="$LOG_FOLDER/result.txt"
|
||||||
PROFILE_PATH="$LOG_FOLDER/profile"
|
PROFILE_PATH="$LOG_FOLDER/profile"
|
||||||
|
|
||||||
echo "result file: $RESULT"
|
echo "====================== AUTO TUNE PARAMETERS ===================="
|
||||||
echo "model: $MODEL"
|
echo "SCRIPT_DIR=$SCRIPT_DIR"
|
||||||
|
echo "BASE=$BASE"
|
||||||
|
echo "MODEL=$MODEL"
|
||||||
|
echo "SYSTEM=$SYSTEM"
|
||||||
|
echo "TP=$TP"
|
||||||
|
echo "DOWNLOAD_DIR=$DOWNLOAD_DIR"
|
||||||
|
echo "INPUT_LEN=$INPUT_LEN"
|
||||||
|
echo "OUTPUT_LEN=$OUTPUT_LEN"
|
||||||
|
echo "MAX_MODEL_LEN=$MAX_MODEL_LEN"
|
||||||
|
echo "MIN_CACHE_HIT_PCT=$MIN_CACHE_HIT_PCT"
|
||||||
|
echo "MAX_LATENCY_ALLOWED_MS=$MAX_LATENCY_ALLOWED_MS"
|
||||||
|
echo "NUM_SEQS_LIST=$NUM_SEQS_LIST"
|
||||||
|
echo "NUM_BATCHED_TOKENS_LIST=$NUM_BATCHED_TOKENS_LIST"
|
||||||
|
echo "VLLM_LOGGING_LEVEL=$VLLM_LOGGING_LEVEL"
|
||||||
|
echo "RESULT_FILE=$RESULT"
|
||||||
|
echo "====================== AUTO TUNEPARAMETERS ===================="
|
||||||
|
|
||||||
rm -rf $LOG_FOLDER
|
rm -rf $LOG_FOLDER
|
||||||
rm -rf $PROFILE_PATH
|
rm -rf $PROFILE_PATH
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user