From 40b86aa05e4458bf28f038666942d620d89c8c3d Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Sun, 6 Jul 2025 00:17:30 -0400 Subject: [PATCH] [BugFix] Fix: ImportError when building on hopper systems (#20513) Signed-off-by: Lucas Wilkinson --- .github/CODEOWNERS | 2 +- csrc/ops.h | 5 ----- .../cutlass_w8a8/moe/blockwise_scaled_group_mm_sm100.cu | 9 ++++++++- csrc/torch_bindings.cpp | 3 +-- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index da7f89747a16d..2acb03d52a67c 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -16,7 +16,7 @@ /vllm/lora @jeejeelee /vllm/reasoning @aarnphm /vllm/entrypoints @aarnphm -CMakeLists.txt @tlrmchlsmth +CMakeLists.txt @tlrmchlsmth @LucasWilkinson # Any change to the VllmConfig changes can have a large user-facing impact, # so spam a lot of people diff --git a/csrc/ops.h b/csrc/ops.h index 56e51cc659d86..52c264d64ccad 100644 --- a/csrc/ops.h +++ b/csrc/ops.h @@ -239,11 +239,6 @@ void cutlass_moe_mm( torch::Tensor const& b_strides, torch::Tensor const& c_strides, bool per_act_token, bool per_out_ch); -void cutlass_blockwise_scaled_grouped_mm( - torch::Tensor& output, const torch::Tensor& a, const torch::Tensor& b, - const torch::Tensor& scales_a, const torch::Tensor& scales_b, - const torch::Tensor& problem_sizes, const torch::Tensor& expert_offsets); - void cutlass_fp4_group_mm( torch::Tensor& output, const torch::Tensor& a, const torch::Tensor& b, const torch::Tensor& a_blockscale, const torch::Tensor& b_blockscales, diff --git a/csrc/quantization/cutlass_w8a8/moe/blockwise_scaled_group_mm_sm100.cu b/csrc/quantization/cutlass_w8a8/moe/blockwise_scaled_group_mm_sm100.cu index ef57e503b21ae..236d76ed52081 100644 --- a/csrc/quantization/cutlass_w8a8/moe/blockwise_scaled_group_mm_sm100.cu +++ b/csrc/quantization/cutlass_w8a8/moe/blockwise_scaled_group_mm_sm100.cu @@ -1,3 +1,5 @@ +#include "core/registration.h" + #include #include @@ -364,4 +366,9 @@ void cutlass_blockwise_scaled_grouped_mm( TORCH_CHECK(false, "Unsupported output tensor type"); } #endif -} \ No newline at end of file +} + +TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) { + m.impl("cutlass_blockwise_scaled_grouped_mm", + &cutlass_blockwise_scaled_grouped_mm); +} diff --git a/csrc/torch_bindings.cpp b/csrc/torch_bindings.cpp index 04329e75db8c3..9414e26196b28 100644 --- a/csrc/torch_bindings.cpp +++ b/csrc/torch_bindings.cpp @@ -399,8 +399,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) { "Tensor scales_a, Tensor scales_b, " "Tensor problem_sizes, Tensor expert_offsets) -> ()", {stride_tag}); - ops.impl("cutlass_blockwise_scaled_grouped_mm", torch::kCUDA, - &cutlass_blockwise_scaled_grouped_mm); + // conditionally compiled so impl registration is in source file // cutlass nvfp4 block scaled group GEMM ops.def(