From 0023cd2b9dfe37ee298f9dba8502c5d3c0a1cbbb Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com> Date: Thu, 20 Feb 2025 02:05:00 -0500 Subject: [PATCH] [ROCm] MI300A compile targets deprecation (#13560) --- CMakeLists.txt | 2 +- csrc/quantization/fp8/amd/hip_float8_impl.h | 3 +-- csrc/rocm/attention.cu | 3 +-- vllm/attention/backends/rocm_flash_attn.py | 3 +-- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e8f7adf6ea9..cd1c2c9015da 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,7 @@ set(PYTHON_SUPPORTED_VERSIONS "3.9" "3.10" "3.11" "3.12") set(CUDA_SUPPORTED_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0") # Supported AMD GPU architectures. -set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101") +set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101") # # Supported/expected torch versions for CUDA/ROCm. diff --git a/csrc/quantization/fp8/amd/hip_float8_impl.h b/csrc/quantization/fp8/amd/hip_float8_impl.h index 90251c353953..8b9cd26f2f76 100644 --- a/csrc/quantization/fp8/amd/hip_float8_impl.h +++ b/csrc/quantization/fp8/amd/hip_float8_impl.h @@ -1,7 +1,6 @@ #pragma once -#if defined(__HIPCC__) && \ - (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)) +#if defined(__HIPCC__) && defined(__gfx942__) #define __HIP__MI300__ #endif diff --git a/csrc/rocm/attention.cu b/csrc/rocm/attention.cu index 366b3cdc23aa..82f7104a9e5a 100644 --- a/csrc/rocm/attention.cu +++ b/csrc/rocm/attention.cu @@ -24,8 +24,7 @@ #include "../attention/dtype_fp8.cuh" #include "../quantization/fp8/amd/quant_utils.cuh" -#if defined(__HIPCC__) && (defined(__gfx90a__) || defined(__gfx940__) || \ - defined(__gfx941__) || defined(__gfx942__)) +#if defined(__HIPCC__) && (defined(__gfx90a__) || defined(__gfx942__)) #define __HIP__MI300_MI250__ #endif diff --git a/vllm/attention/backends/rocm_flash_attn.py b/vllm/attention/backends/rocm_flash_attn.py index 02bff57a62b7..f49b37842d9b 100644 --- a/vllm/attention/backends/rocm_flash_attn.py +++ b/vllm/attention/backends/rocm_flash_attn.py @@ -25,8 +25,7 @@ logger = init_logger(__name__) _PARTITION_SIZE_ROCM = 512 _GPU_ARCH = torch.cuda.get_device_properties("cuda").gcnArchName _ON_NAVI = "gfx1" in _GPU_ARCH -_ON_MI250_MI300 = any(arch in _GPU_ARCH - for arch in ["gfx90a", "gfx940", "gfx941", "gfx942"]) +_ON_MI250_MI300 = any(arch in _GPU_ARCH for arch in ["gfx90a", "gfx942"]) class ROCmFlashAttentionBackend(AttentionBackend):