[BugFix] Fix: ImportError when building on hopper systems (#20513)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
2026-07-25 03:17:11 +08:00 · 2025-07-06 00:17:30 -04:00 · 2025-07-06 00:17:30 -04:00 · 40b86aa05e
commit 40b86aa05e
parent 432870829d
4 changed files with 10 additions and 9 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@ -16,7 +16,7 @@
 /vllm/lora @jeejeelee
 /vllm/reasoning @aarnphm
 /vllm/entrypoints @aarnphm
-CMakeLists.txt @tlrmchlsmth
+CMakeLists.txt @tlrmchlsmth @LucasWilkinson
 # Any change to the VllmConfig changes can have a large user-facing impact,
 # so spam a lot of people
--- a/csrc/ops.h
+++ b/csrc/ops.h
@ -239,11 +239,6 @@ void cutlass_moe_mm(
    torch::Tensor const& b_strides, torch::Tensor const& c_strides,
    bool per_act_token, bool per_out_ch);
 void cutlass_blockwise_scaled_grouped_mm(
    torch::Tensor& output, const torch::Tensor& a, const torch::Tensor& b,
    const torch::Tensor& scales_a, const torch::Tensor& scales_b,
    const torch::Tensor& problem_sizes, const torch::Tensor& expert_offsets);
 void cutlass_fp4_group_mm(
    torch::Tensor& output, const torch::Tensor& a, const torch::Tensor& b,
    const torch::Tensor& a_blockscale, const torch::Tensor& b_blockscales,
--- a/csrc/quantization/cutlass_w8a8/moe/blockwise_scaled_group_mm_sm100.cu
+++ b/csrc/quantization/cutlass_w8a8/moe/blockwise_scaled_group_mm_sm100.cu
@ -1,3 +1,5 @@
 #include "core/registration.h"
 #include <torch/all.h>
 #include <cutlass/arch/arch.h>
@ -365,3 +367,8 @@ void cutlass_blockwise_scaled_grouped_mm(
  }
 #endif
 }
 TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) {
  m.impl("cutlass_blockwise_scaled_grouped_mm",
         &cutlass_blockwise_scaled_grouped_mm);
 }
--- a/csrc/torch_bindings.cpp
+++ b/csrc/torch_bindings.cpp
@ -399,8 +399,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
      "Tensor scales_a, Tensor scales_b, "
      "Tensor problem_sizes, Tensor expert_offsets) -> ()",
      {stride_tag});
-  ops.impl("cutlass_blockwise_scaled_grouped_mm", torch::kCUDA,
+  // conditionally compiled so impl registration is in source file
           &cutlass_blockwise_scaled_grouped_mm);
  // cutlass nvfp4 block scaled group GEMM
  ops.def(