diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py index f31e4766bfda..48a82d30193e 100644 --- a/vllm/distributed/parallel_state.py +++ b/vllm/distributed/parallel_state.py @@ -1013,6 +1013,7 @@ def initialize_model_parallel( parallelism. pipeline_model_parallel_size: number of GPUs used for pipeline model parallelism. + backend: name of torch distributed communication backend. Let's say we have a total of 8 GPUs denoted by g0 ... g7 and we use 2 GPUs to parallelize the model tensor, and 4 GPUs to parallelize