[BugFix] Fix: ImportError when building on hopper systems (#20513)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
Lucas Wilkinson 2025-07-06 00:17:30 -04:00 committed by GitHub
parent 432870829d
commit 40b86aa05e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 10 additions and 9 deletions

2
.github/CODEOWNERS vendored
View File

@ -16,7 +16,7 @@
/vllm/lora @jeejeelee
/vllm/reasoning @aarnphm
/vllm/entrypoints @aarnphm
CMakeLists.txt @tlrmchlsmth
CMakeLists.txt @tlrmchlsmth @LucasWilkinson
# Any change to the VllmConfig changes can have a large user-facing impact,
# so spam a lot of people

View File

@ -239,11 +239,6 @@ void cutlass_moe_mm(
torch::Tensor const& b_strides, torch::Tensor const& c_strides,
bool per_act_token, bool per_out_ch);
void cutlass_blockwise_scaled_grouped_mm(
torch::Tensor& output, const torch::Tensor& a, const torch::Tensor& b,
const torch::Tensor& scales_a, const torch::Tensor& scales_b,
const torch::Tensor& problem_sizes, const torch::Tensor& expert_offsets);
void cutlass_fp4_group_mm(
torch::Tensor& output, const torch::Tensor& a, const torch::Tensor& b,
const torch::Tensor& a_blockscale, const torch::Tensor& b_blockscales,

View File

@ -1,3 +1,5 @@
#include "core/registration.h"
#include <torch/all.h>
#include <cutlass/arch/arch.h>
@ -364,4 +366,9 @@ void cutlass_blockwise_scaled_grouped_mm(
TORCH_CHECK(false, "Unsupported output tensor type");
}
#endif
}
}
TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) {
m.impl("cutlass_blockwise_scaled_grouped_mm",
&cutlass_blockwise_scaled_grouped_mm);
}

View File

@ -399,8 +399,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
"Tensor scales_a, Tensor scales_b, "
"Tensor problem_sizes, Tensor expert_offsets) -> ()",
{stride_tag});
ops.impl("cutlass_blockwise_scaled_grouped_mm", torch::kCUDA,
&cutlass_blockwise_scaled_grouped_mm);
// conditionally compiled so impl registration is in source file
// cutlass nvfp4 block scaled group GEMM
ops.def(