[Bugfix][V1][P/D]Fix the issue of occasional garbled output for P2pNcclConnector (#20263)

Signed-off-by: Abatom <abzhonghua@gmail.com>
This commit is contained in:
Zhonghua Deng 2025-07-01 07:45:14 +08:00 committed by GitHub
parent 97d9524fe9
commit ded1fb635b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -310,10 +310,11 @@ class P2pNcclEngine:
elif data["cmd"] == "PUT":
tensor_id = data["tensor_id"]
try:
tensor = torch.empty(data["shape"],
dtype=getattr(
torch, data["dtype"]),
device=self.device)
with torch.cuda.stream(self.recv_stream):
tensor = torch.empty(data["shape"],
dtype=getattr(
torch, data["dtype"]),
device=self.device)
self.router_socket.send_multipart(
[remote_address, b"0"])
comm, rank = self.comms[remote_address.decode()]