From ef608c37a7cf106fcd48aee24ec44248d2a6665b Mon Sep 17 00:00:00 2001
From: Ilya Markov <markovilya197@gmail.com>
Date: Fri, 4 Apr 2025 18:39:08 +0200
Subject: [PATCH] [Distributed] [ROCM] Fix custom allreduce enable checks
 (#16010)

Signed-off-by: ilmarkov <imarkov@redhat.com>
Co-authored-by: ilmarkov <imarkov@redhat.com>
---
 vllm/config.py              | 7 +++----
 vllm/platforms/cuda.py      | 4 ++++
 vllm/platforms/interface.py | 7 +++++++
 vllm/platforms/rocm.py      | 7 +++++++
 4 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index 92e887e08639f..1aadf2c25b430 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1619,13 +1619,12 @@ class ParallelConfig:
         if self.use_ray:
             from vllm.executor import ray_utils
             ray_utils.assert_ray_available()
-        device_capability = current_platform.get_device_capability()
-        if (current_platform.is_rocm() and device_capability is not None
-                and device_capability < (9, 4)):
+
+        if not current_platform.use_custom_allreduce():
             self.disable_custom_all_reduce = True
             logger.info(
                 "Disabled the custom all-reduce kernel because it is not "
-                "supported on AMD GPUs older than MI300X.")
+                "supported on current platform.")
         if self.ray_workers_use_nsight and not self.use_ray:
             raise ValueError("Unable to use nsight profiling unless workers "
                              "run with Ray.")
diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
index 28505fca10dfa..0576022be448b 100644
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -308,6 +308,10 @@ class CudaPlatformBase(Platform):
     def supports_v1(cls, model_config: ModelConfig) -> bool:
         return True
 
+    @classmethod
+    def use_custom_allreduce(cls) -> bool:
+        return True
+
 
 # NVML utils
 # Note that NVML is not affected by `CUDA_VISIBLE_DEVICES`,
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index 36db70681a199..b6f6029de9c82 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -379,6 +379,13 @@ class Platform:
         """
         return False
 
+    @classmethod
+    def use_custom_allreduce(cls) -> bool:
+        """
+        Returns if custom allreduce is supported on the current platform
+        """
+        return False
+
 
 class UnspecifiedPlatform(Platform):
     _enum = PlatformEnum.UNSPECIFIED
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index 0bedd80e5ecf1..d18b7c26f7ec5 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -302,3 +302,10 @@ class RocmPlatform(Platform):
     def supports_v1(cls, model_config: ModelConfig) -> bool:
         # V1 support on AMD gpus is experimental
         return True
+
+    @classmethod
+    def use_custom_allreduce(cls) -> bool:
+        # We only enable custom allreduce for MI300 series
+        gcn_arch = torch.cuda.get_device_properties(0).gcnArchName
+        supported_archs = ['gfx94']
+        return any(gfx in gcn_arch for gfx in supported_archs)