From 0023cd2b9dfe37ee298f9dba8502c5d3c0a1cbbb Mon Sep 17 00:00:00 2001
From: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com>
Date: Thu, 20 Feb 2025 02:05:00 -0500
Subject: [PATCH] [ROCm] MI300A compile targets deprecation (#13560)

---
 CMakeLists.txt                              | 2 +-
 csrc/quantization/fp8/amd/hip_float8_impl.h | 3 +--
 csrc/rocm/attention.cu                      | 3 +--
 vllm/attention/backends/rocm_flash_attn.py  | 3 +--
 4 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8e8f7adf6ea9..cd1c2c9015da 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -34,7 +34,7 @@ set(PYTHON_SUPPORTED_VERSIONS "3.9" "3.10" "3.11" "3.12")
 set(CUDA_SUPPORTED_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0")
 
 # Supported AMD GPU architectures.
-set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101")
+set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101")
 
 #
 # Supported/expected torch versions for CUDA/ROCm.
diff --git a/csrc/quantization/fp8/amd/hip_float8_impl.h b/csrc/quantization/fp8/amd/hip_float8_impl.h
index 90251c353953..8b9cd26f2f76 100644
--- a/csrc/quantization/fp8/amd/hip_float8_impl.h
+++ b/csrc/quantization/fp8/amd/hip_float8_impl.h
@@ -1,7 +1,6 @@
 #pragma once
 
-#if defined(__HIPCC__) && \
-    (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__))
+#if defined(__HIPCC__) && defined(__gfx942__)
   #define __HIP__MI300__
 #endif
 
diff --git a/csrc/rocm/attention.cu b/csrc/rocm/attention.cu
index 366b3cdc23aa..82f7104a9e5a 100644
--- a/csrc/rocm/attention.cu
+++ b/csrc/rocm/attention.cu
@@ -24,8 +24,7 @@
 #include "../attention/dtype_fp8.cuh"
 #include "../quantization/fp8/amd/quant_utils.cuh"
 
-#if defined(__HIPCC__) && (defined(__gfx90a__) || defined(__gfx940__) || \
-                           defined(__gfx941__) || defined(__gfx942__))
+#if defined(__HIPCC__) && (defined(__gfx90a__) || defined(__gfx942__))
   #define __HIP__MI300_MI250__
 #endif
 
diff --git a/vllm/attention/backends/rocm_flash_attn.py b/vllm/attention/backends/rocm_flash_attn.py
index 02bff57a62b7..f49b37842d9b 100644
--- a/vllm/attention/backends/rocm_flash_attn.py
+++ b/vllm/attention/backends/rocm_flash_attn.py
@@ -25,8 +25,7 @@ logger = init_logger(__name__)
 _PARTITION_SIZE_ROCM = 512
 _GPU_ARCH = torch.cuda.get_device_properties("cuda").gcnArchName
 _ON_NAVI = "gfx1" in _GPU_ARCH
-_ON_MI250_MI300 = any(arch in _GPU_ARCH
-                      for arch in ["gfx90a", "gfx940", "gfx941", "gfx942"])
+_ON_MI250_MI300 = any(arch in _GPU_ARCH for arch in ["gfx90a", "gfx942"])
 
 
 class ROCmFlashAttentionBackend(AttentionBackend):