From 9351f91be96c58167631e43feb807a78cf2f0340 Mon Sep 17 00:00:00 2001 From: TY-AMD Date: Tue, 8 Apr 2025 20:10:26 +0800 Subject: [PATCH] [BugFix][ROCm] Fix GGUF MoE Dispatch Block_Dim for ROCm (#16247) Signed-off-by: Tianyuan Wu --- csrc/quantization/gguf/moe.cuh | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/csrc/quantization/gguf/moe.cuh b/csrc/quantization/gguf/moe.cuh index c10c59d7a38a..df9b84abcc13 100644 --- a/csrc/quantization/gguf/moe.cuh +++ b/csrc/quantization/gguf/moe.cuh @@ -129,7 +129,7 @@ static __device__ __forceinline__ void moe_q( } #if defined(USE_ROCM) - #define MOE_X_Q4_0 64 + #define MOE_X_Q4_0 8 #define MOE_Y_Q4_0 128 #define NWARPS_Q4_0 8 #else @@ -190,7 +190,7 @@ static void ggml_moe_q4_0_q8_1_cuda( } #if defined(USE_ROCM) - #define MOE_X_Q4_1 64 + #define MOE_X_Q4_1 8 #define MOE_Y_Q4_1 128 #define NWARPS_Q4_1 8 #else @@ -251,7 +251,7 @@ static void ggml_moe_q4_1_q8_1_cuda( } #if defined(USE_ROCM) - #define MOE_X_Q5_0 64 + #define MOE_X_Q5_0 8 #define MOE_Y_Q5_0 128 #define NWARPS_Q5_0 8 #else @@ -312,7 +312,7 @@ static void ggml_moe_q5_0_q8_1_cuda( } #if defined(USE_ROCM) - #define MOE_X_Q5_1 64 + #define MOE_X_Q5_1 8 #define MOE_Y_Q5_1 128 #define NWARPS_Q5_1 8 #else @@ -373,7 +373,7 @@ static void ggml_moe_q5_1_q8_1_cuda( } #if defined(USE_ROCM) - #define MOE_X_Q8_0 64 + #define MOE_X_Q8_0 8 #define MOE_Y_Q8_0 128 #define NWARPS_Q8_0 8 #else @@ -434,7 +434,7 @@ static void ggml_moe_q8_0_q8_1_cuda( } #if defined(USE_ROCM) - #define MOE_X_Q2_K 64 + #define MOE_X_Q2_K 8 #define MOE_Y_Q2_K 128 #define NWARPS_Q2_K 8 #else @@ -495,7 +495,7 @@ static void ggml_moe_q2_K_q8_1_cuda( } #if defined(USE_ROCM) - #define MOE_X_Q3_K 64 + #define MOE_X_Q3_K 8 #define MOE_Y_Q3_K 128 #define NWARPS_Q3_K 8 #else @@ -556,7 +556,7 @@ static void ggml_moe_q3_K_q8_1_cuda( } #if defined(USE_ROCM) - #define MOE_X_Q4_K 64 + #define MOE_X_Q4_K 8 #define MOE_Y_Q4_K 128 #define NWARPS_Q4_K 8 #else @@ -617,7 +617,7 @@ static void ggml_moe_q4_K_q8_1_cuda( } #if defined(USE_ROCM) - #define MOE_X_Q5_K 64 + #define MOE_X_Q5_K 8 #define MOE_Y_Q5_K 128 #define NWARPS_Q5_K 8 #else @@ -678,7 +678,7 @@ static void ggml_moe_q5_K_q8_1_cuda( } #if defined(USE_ROCM) - #define MOE_X_Q6_K 64 + #define MOE_X_Q6_K 8 #define MOE_Y_Q6_K 128 #define NWARPS_Q6_K 8 #else