Signed-off-by: Robert Shaw <robshaw@redhat.com>
This commit is contained in:
Robert Shaw 2025-07-13 15:46:20 +00:00
parent 2884e7f861
commit 6ac7b874b1

View File

@ -2,20 +2,20 @@
vllm-directory := "/home/rshaw/vllm/"
launch_dp_ep MODEL SIZE:
vllm serve {{MODEL}} --data-parallel-size {{SIZE}} --enable-expert-parallel
vllm serve {{MODEL}} --data-parallel-size {{SIZE}} --enable-expert-parallel --disable-log-requests
launch_tp MODEL SIZE:
vllm serve {{MODEL}} --tensor-parallel-size {{SIZE}}
vllm serve {{MODEL}} --tensor-parallel-size {{SIZE}} --disable-log-requests
eval MODEL:
lm_eval --model local-completions --tasks gsm8k \
--model_args model={{MODEL}},base_url=http://127.0.0.1:800/v1/completions,num_concurrent=100,tokenized_requests=False
--model_args model={{MODEL}},base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=100,tokenized_requests=False
benchmark MODEL NUM_PROMPTS:
python {{vllm-directory}}/benchmarks/benchmark_serving.py \
--model {{MODEL}} \
--dataset-name random \
--random-input-len 30000 \
--random-output-len 10 \
--random-input-len 1000 \
--random-output-len 100 \
--num-prompts {{NUM_PROMPTS}} \
--seed $(date +%s) \
--seed $(date +%s)