mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-07 10:09:08 +08:00
[BugFix] Fix: ImportError when building on hopper systems (#20513)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
parent
432870829d
commit
40b86aa05e
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
@ -16,7 +16,7 @@
|
|||||||
/vllm/lora @jeejeelee
|
/vllm/lora @jeejeelee
|
||||||
/vllm/reasoning @aarnphm
|
/vllm/reasoning @aarnphm
|
||||||
/vllm/entrypoints @aarnphm
|
/vllm/entrypoints @aarnphm
|
||||||
CMakeLists.txt @tlrmchlsmth
|
CMakeLists.txt @tlrmchlsmth @LucasWilkinson
|
||||||
|
|
||||||
# Any change to the VllmConfig changes can have a large user-facing impact,
|
# Any change to the VllmConfig changes can have a large user-facing impact,
|
||||||
# so spam a lot of people
|
# so spam a lot of people
|
||||||
|
|||||||
@ -239,11 +239,6 @@ void cutlass_moe_mm(
|
|||||||
torch::Tensor const& b_strides, torch::Tensor const& c_strides,
|
torch::Tensor const& b_strides, torch::Tensor const& c_strides,
|
||||||
bool per_act_token, bool per_out_ch);
|
bool per_act_token, bool per_out_ch);
|
||||||
|
|
||||||
void cutlass_blockwise_scaled_grouped_mm(
|
|
||||||
torch::Tensor& output, const torch::Tensor& a, const torch::Tensor& b,
|
|
||||||
const torch::Tensor& scales_a, const torch::Tensor& scales_b,
|
|
||||||
const torch::Tensor& problem_sizes, const torch::Tensor& expert_offsets);
|
|
||||||
|
|
||||||
void cutlass_fp4_group_mm(
|
void cutlass_fp4_group_mm(
|
||||||
torch::Tensor& output, const torch::Tensor& a, const torch::Tensor& b,
|
torch::Tensor& output, const torch::Tensor& a, const torch::Tensor& b,
|
||||||
const torch::Tensor& a_blockscale, const torch::Tensor& b_blockscales,
|
const torch::Tensor& a_blockscale, const torch::Tensor& b_blockscales,
|
||||||
|
|||||||
@ -1,3 +1,5 @@
|
|||||||
|
#include "core/registration.h"
|
||||||
|
|
||||||
#include <torch/all.h>
|
#include <torch/all.h>
|
||||||
#include <cutlass/arch/arch.h>
|
#include <cutlass/arch/arch.h>
|
||||||
|
|
||||||
@ -364,4 +366,9 @@ void cutlass_blockwise_scaled_grouped_mm(
|
|||||||
TORCH_CHECK(false, "Unsupported output tensor type");
|
TORCH_CHECK(false, "Unsupported output tensor type");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) {
|
||||||
|
m.impl("cutlass_blockwise_scaled_grouped_mm",
|
||||||
|
&cutlass_blockwise_scaled_grouped_mm);
|
||||||
|
}
|
||||||
|
|||||||
@ -399,8 +399,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
|
|||||||
"Tensor scales_a, Tensor scales_b, "
|
"Tensor scales_a, Tensor scales_b, "
|
||||||
"Tensor problem_sizes, Tensor expert_offsets) -> ()",
|
"Tensor problem_sizes, Tensor expert_offsets) -> ()",
|
||||||
{stride_tag});
|
{stride_tag});
|
||||||
ops.impl("cutlass_blockwise_scaled_grouped_mm", torch::kCUDA,
|
// conditionally compiled so impl registration is in source file
|
||||||
&cutlass_blockwise_scaled_grouped_mm);
|
|
||||||
|
|
||||||
// cutlass nvfp4 block scaled group GEMM
|
// cutlass nvfp4 block scaled group GEMM
|
||||||
ops.def(
|
ops.def(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user