From 173c8a95205677ad68c261fb035de7f7aa08033d Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Thu, 2 Oct 2025 23:32:38 -0400 Subject: [PATCH] [CI/Build] Conditionally register cutlass_fp4_group_mm to fix building on Hopper (#26138) Signed-off-by: mgoin Signed-off-by: yewentao256 --- csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu | 6 ++++++ csrc/torch_bindings.cpp | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu b/csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu index 2c8df6144bf4d..5b007e5ea3283 100644 --- a/csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu +++ b/csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "core/registration.h" + #include #include @@ -418,3 +420,7 @@ void cutlass_fp4_group_mm( "12.8 or above."); #endif } + +TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) { + m.impl("cutlass_fp4_group_mm", &cutlass_fp4_group_mm); +} diff --git a/csrc/torch_bindings.cpp b/csrc/torch_bindings.cpp index ebd28e7350886..64a345eb66cc4 100644 --- a/csrc/torch_bindings.cpp +++ b/csrc/torch_bindings.cpp @@ -397,7 +397,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) { " Tensor a_blockscale, Tensor b_blockscales, Tensor alphas," " Tensor problem_sizes, Tensor expert_offsets, Tensor sf_offsets) -> ()", {stride_tag}); - ops.impl("cutlass_fp4_group_mm", torch::kCUDA, &cutlass_fp4_group_mm); + // conditionally compiled so impl registration is in source file // CUTLASS w8a8 GEMM, supporting symmetric per-tensor or per-row/column // quantization, as well as bias