From 571e8dd65e2a286c621f7552d1d336ac3fe08b4a Mon Sep 17 00:00:00 2001 From: Yang Fan Date: Tue, 22 Apr 2025 20:23:17 +0800 Subject: [PATCH] [Bugfix] Fix distributed bug again in Qwen2.5-VL & Qwen2.5-Omni (#16974) Signed-off-by: fyabc --- vllm/model_executor/models/qwen2_5_vl.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index 30980316ecfc7..0ab55411bad4f 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -198,8 +198,11 @@ class Qwen2_5_VisionMLP(nn.Module): def all_gather_interleave(local_tensor, hidden_size: int, tp_size: int): """All-gather the input tensor interleavely across model parallel group.""" + import torch.distributed as dist gathered_tensors = [torch.zeros_like(local_tensor) for _ in range(tp_size)] - parallel_state.get_tp_group().all_gather(gathered_tensors, local_tensor) + dist.all_gather(gathered_tensors, + local_tensor, + group=parallel_state.get_tp_group().device_group) gathered_tensors_split = [ torch.split(tensor, hidden_size // tp_size, -1)