From 850dafea92fde38e62385981ad35f52526f233b4 Mon Sep 17 00:00:00 2001
From: Tyler Michael Smith <tysmith@redhat.com>
Date: Fri, 20 Jun 2025 19:57:07 +0000
Subject: [PATCH] update

Signed-off-by: Tyler Michael Smith <tysmith@redhat.com>
---
 vllm/distributed/device_communicators/all2all.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py
index 35f2fd0ba9e22..c8c373b8adc9b 100644
--- a/vllm/distributed/device_communicators/all2all.py
+++ b/vllm/distributed/device_communicators/all2all.py
@@ -138,9 +138,11 @@ class DeepEPAll2AllManagerBase(All2AllManagerBase):
         super().__init__(cpu_group)
         self.handle_cache = Cache()
 
-        # This is the DeepEP default. Stick to it till we can establish
-        # reasonable defaults based on profiling.
-        self.num_sms = 20
+        # Use all SMs for all2all communication
+        # This will need to be adjusted for dual-batch overlap
+        device = self.dp_group.device
+        props = torch.cuda.get_device_properties(device)
+        self.num_sms = props.multi_processor_count
 
     def get_handle(self, kwargs):
         raise NotImplementedError