diff --git a/csrc/moe/topk_softmax_kernels.cu b/csrc/moe/topk_softmax_kernels.cu index ea4ff67ef3e40..064b76c9cd427 100644 --- a/csrc/moe/topk_softmax_kernels.cu +++ b/csrc/moe/topk_softmax_kernels.cu @@ -20,7 +20,6 @@ #include #include #include "../cuda_compat.h" -#include #ifndef USE_ROCM #include @@ -63,7 +62,7 @@ __launch_bounds__(TPB) __global__ const int thread_row_offset = blockIdx.x * num_cols; - cuda::std::plus sum; + cub::Sum sum; float threadData(-FLT_MAX); // Don't touch finished rows.