mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-05 09:27:03 +08:00
correct LWS deployment yaml (#23104)
Signed-off-by: cberge908 <42270330+cberge908@users.noreply.github.com>
This commit is contained in:
parent
ce30dca5c4
commit
8bd5844989
@ -22,7 +22,7 @@ Deploy the following yaml file `lws.yaml`
|
||||
metadata:
|
||||
name: vllm
|
||||
spec:
|
||||
replicas: 2
|
||||
replicas: 1
|
||||
leaderWorkerTemplate:
|
||||
size: 2
|
||||
restartPolicy: RecreateGroupOnPodRestart
|
||||
@ -41,7 +41,7 @@ Deploy the following yaml file `lws.yaml`
|
||||
- sh
|
||||
- -c
|
||||
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model meta-llama/Meta-Llama-3.1-405B-Instruct --tensor-parallel-size 8 --pipeline_parallel_size 2"
|
||||
vllm serve meta-llama/Meta-Llama-3.1-405B-Instruct --port 8080 --tensor-parallel-size 8 --pipeline_parallel_size 2"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "8"
|
||||
@ -126,8 +126,6 @@ Should get an output similar to this:
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
vllm-0 1/1 Running 0 2s
|
||||
vllm-0-1 1/1 Running 0 2s
|
||||
vllm-1 1/1 Running 0 2s
|
||||
vllm-1-1 1/1 Running 0 2s
|
||||
```
|
||||
|
||||
Verify that the distributed tensor-parallel inference works:
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
# Example usage:
|
||||
# On the head node machine, start the Ray head node process and run a vLLM server.
|
||||
# ./multi-node-serving.sh leader --ray_port=6379 --ray_cluster_size=<SIZE> [<extra ray args>] && \
|
||||
# python3 -m vllm.entrypoints.openai.api_server --port 8080 --model meta-llama/Meta-Llama-3.1-405B-Instruct --tensor-parallel-size 8 --pipeline_parallel_size 2
|
||||
# vllm serve meta-llama/Meta-Llama-3.1-405B-Instruct --port 8080 --tensor-parallel-size 8 --pipeline_parallel_size 2
|
||||
#
|
||||
# On each worker node, start the Ray worker node process.
|
||||
# ./multi-node-serving.sh worker --ray_address=<HEAD_NODE_IP> --ray_port=6379 [<extra ray args>]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user