vllm/examples/online_serving/elastic_ep/serve_deepseek_v2.sh
Cyrus Leung 7e4cd070b0
[V0 Deprecation] Remove VLLM_USE_V1 from docs and scripts (#26336)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-10-07 16:46:44 +08:00

72 lines
2.0 KiB
Bash

#!/bin/bash
HOST="0.0.0.0"
PORT=8006
DATA_PARALLEL_SIZE=4
REDUNDANT_EXPERTS=0
LOCAL_MODEL_PATH="/models/models--deepseek-ai--DeepSeek-V2-Lite/snapshots/604d5664dddd88a0433dbae533b7fe9472482de0"
MODEL_NAME="deepseek-ai/DeepSeek-V2-Lite"
while [[ $# -gt 0 ]]; do
case $1 in
--dp)
DATA_PARALLEL_SIZE="$2"
shift 2
;;
--re)
REDUNDANT_EXPERTS="$2"
shift 2
;;
--host)
HOST="$2"
shift 2
;;
--port)
PORT="$2"
shift 2
;;
--model)
MODEL_NAME="$2"
shift 2
;;
--local-model)
MODEL_NAME=$LOCAL_MODEL_PATH
shift
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " --dp SIZE Set data parallel size (default: 4)"
echo " --re SIZE Set redundant experts (default: 0)"
echo " --host HOST Set host address (default: 0.0.0.0)"
echo " --port PORT Set port number (default: 8006)"
echo " --model MODEL_NAME Set model name or path"
echo " -h, --help Show this help message"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use -h or --help for usage information"
exit 1
;;
esac
done
echo "Starting vLLM server for $MODEL_NAME with data parallel size: $DATA_PARALLEL_SIZE and redundant experts: $REDUNDANT_EXPERTS"
export RAY_DEDUP_LOGS=0
export VLLM_ALL2ALL_BACKEND="pplx"
export VLLM_USE_DEEP_GEMM=1
vllm serve $MODEL_NAME \
--data-parallel-size $DATA_PARALLEL_SIZE \
--data-parallel-size-local $DATA_PARALLEL_SIZE \
--data-parallel-backend ray \
--enforce-eager \
--enable-expert-parallel \
--enable-eplb \
--num-redundant-experts $REDUNDANT_EXPERTS \
--trust-remote-code \
--host $HOST \
--port $PORT