From 8a57872b2ac9b01004ae1d3a3a689de218ea5be5 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Mon, 2 Jun 2025 23:36:51 -0400 Subject: [PATCH] [Bugfix][EP+DP] Use pplx-kernel internode instead of intranode (#19034) Signed-off-by: Tyler Michael Smith Signed-off-by: Tyler Michael Smith --- vllm/distributed/device_communicators/all2all.py | 4 ++++ vllm/model_executor/layers/fused_moe/layer.py | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py index a250ec89cd5ba..7177754a37115 100644 --- a/vllm/distributed/device_communicators/all2all.py +++ b/vllm/distributed/device_communicators/all2all.py @@ -83,6 +83,10 @@ class PPLXAll2AllManager(All2AllManagerBase): assert has_pplx, "pplx_kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md to install pplx_kernels." # noqa super().__init__(cpu_group) + # TODO(tms): Disable pplx-a2a intranode as it fails with the error: + # failed: cuda error /app/pplx/csrc/all_to_all/intranode.cpp:84 'invalid resource handle' # noqa + self.internode = True + if self.internode: # inter-node communication needs nvshmem, # intra-node communication uses p2p mapping directly diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index af7b98e14c6c8..1e193c909f617 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -269,9 +269,13 @@ class FusedMoEMethodBase(QuantizeMethodBase): hidden_dim_scale_bytes=(0 if moe.in_dtype.itemsize != 1 else ( (moe.hidden_dim + moe.block_size - 1) // moe.block_size * torch.float32.itemsize)), - group_name=all2all_manager.cpu_group.group_name, ) + # Intranode pplx a2a takes a group name while internode does not. + if not all2all_manager.internode: + all_to_all_args[ + "group_name"] = all2all_manager.cpu_group.group_name + handle = all2all_manager.get_handle(all_to_all_args) prepare_finalize = PplxPrepareAndFinalize(