From 93f71673ce1a6cd4ac6217c6ca8f7a74c920bcc0 Mon Sep 17 00:00:00 2001 From: "Li, Jiang" Date: Thu, 22 May 2025 22:35:00 +0800 Subject: [PATCH] [BugFix][CPU] Fix x86 SHM distributed module initialization (#18536) Signed-off-by: jiang.li --- vllm/distributed/device_communicators/cpu_communicator.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vllm/distributed/device_communicators/cpu_communicator.py b/vllm/distributed/device_communicators/cpu_communicator.py index d4b34900b951..c04218cb9f39 100644 --- a/vllm/distributed/device_communicators/cpu_communicator.py +++ b/vllm/distributed/device_communicators/cpu_communicator.py @@ -22,8 +22,10 @@ class CpuCommunicator(DeviceCommunicatorBase): super().__init__(cpu_group, device, device_group, unique_name) self.dist_module = torch.distributed - if (current_platform.get_cpu_architecture() == CpuArchEnum.X86) \ - and hasattr(torch.ops._C, "init_shm_manager"): + if (current_platform.get_cpu_architecture() + == CpuArchEnum.X86) and hasattr( + torch.ops._C, + "init_shm_manager") and unique_name.startswith("tp"): self.dist_module = _CPUSHMDistributed(self) def all_reduce(self, input_): @@ -96,6 +98,8 @@ class _CPUSHMDistributed: def __init__(self, communicator: CpuCommunicator): instance_identifier = os.environ["VLLM_DIST_IDENT"] + unique_name = communicator.unique_name + instance_identifier = f"{instance_identifier}-{unique_name}" self.communicator = communicator group_ranks = [str(rank) for rank in self.communicator.ranks]