[ROCm] warpSize is being made non constexpr in ROCm 7.0 (#20330)

Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
2026-07-26 07:37:10 +08:00 · 2025-07-15 14:01:44 -04:00 · 2025-07-15 14:01:44 -04:00 · ed10f3cea1
commit ed10f3cea1
parent b637e9dcb8
4 changed files with 6 additions and 24 deletions
--- a/csrc/attention/attention_kernels.cuh
+++ b/csrc/attention/attention_kernels.cuh
@ -24,6 +24,7 @@
 #include "attention_dtypes.h"
 #include "attention_utils.cuh"
 #include "cuda_compat.h"
 #ifdef USE_ROCM
  #include <hip/hip_bf16.h>
@ -33,12 +34,6 @@ typedef __hip_bfloat16 __nv_bfloat16;
  #include "../quantization/fp8/nvidia/quant_utils.cuh"
 #endif
 #ifndef USE_ROCM
  #define WARP_SIZE 32
 #else
  #define WARP_SIZE warpSize
 #endif
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 #define DIVIDE_ROUND_UP(a, b) (((a) + (b) - 1) / (b))
@ -670,7 +665,6 @@ __global__ void paged_attention_v2_reduce_kernel(
 }  // namespace vllm
 #undef WARP_SIZE
 #undef MAX
 #undef MIN
 #undef DIVIDE_ROUND_UP
--- a/csrc/attention/paged_attention_v1.cu
+++ b/csrc/attention/paged_attention_v1.cu
@ -18,12 +18,7 @@
 */
 #include "attention_kernels.cuh"
-
+#include "cuda_compat.h"
 #ifndef USE_ROCM
  #define WARP_SIZE 32
 #else
  #define WARP_SIZE warpSize
 #endif
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
@ -187,7 +182,6 @@ void paged_attention_v1(
                             CALL_V1_LAUNCHER_BLOCK_SIZE)
 }
 #undef WARP_SIZE
 #undef MAX
 #undef MIN
 #undef DIVIDE_ROUND_UP
--- a/csrc/attention/paged_attention_v2.cu
+++ b/csrc/attention/paged_attention_v2.cu
@ -18,12 +18,7 @@
 */
 #include "attention_kernels.cuh"
-
+#include "cuda_compat.h"
 #ifndef USE_ROCM
  #define WARP_SIZE 32
 #else
  #define WARP_SIZE warpSize
 #endif
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
@ -197,7 +192,6 @@ void paged_attention_v2(
                             CALL_V2_LAUNCHER_BLOCK_SIZE)
 }
 #undef WARP_SIZE
 #undef MAX
 #undef MIN
 #undef DIVIDE_ROUND_UP
--- a/csrc/cuda_compat.h
+++ b/csrc/cuda_compat.h
@ -4,10 +4,10 @@
  #include <hip/hip_runtime.h>
 #endif
-#ifndef USE_ROCM
+#if defined(USE_ROCM) && defined(__GFX9__)
-  #define WARP_SIZE 32
+  #define WARP_SIZE 64
 #else
-  #define WARP_SIZE warpSize
+  #define WARP_SIZE 32
 #endif
 #ifndef USE_ROCM