From df866cfebf8beb5c9fe33d3409f4efc72d8881c6 Mon Sep 17 00:00:00 2001
From: Tyler Michael Smith <tyler@neuralmagic.com>
Date: Thu, 10 Jul 2025 18:27:59 -0400
Subject: [PATCH] update

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
---
 vllm/distributed/parallel_state.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
index d7bb4f3aeef15..996b8147f2041 100644
--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -782,7 +782,6 @@ class GroupCoordinator:
 
     def prepare_communication_buffer_for_model(self, model: torch.nn.Module):
         if self.device_communicator is not None:
-            torch.distributed.barrier(self.device_communicator)
             self.device_communicator.prepare_communication_buffer_for_model(
                 model)
 
@@ -1116,6 +1115,7 @@ def prepare_communication_buffer_for_model(model: torch.nn.Module):
     MoE all2all (DeepEP) usually allocate the communication buffer
     based on the model shape for optimal performance.
     """
+    get_world_group().barrier()
     if _TP is not None:
         _TP.prepare_communication_buffer_for_model(model)
     if _PP is not None: