mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-12 04:37:12 +08:00
[Compile] Fix Compile Warning SM100 Cutlass MLA (#23287)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
3663870c72
commit
f94bf9b924
@ -167,7 +167,7 @@ typename T::Fmha::Arguments args_from_options(
|
|||||||
// TODO(trevor-m): Change split_kv back to -1 when
|
// TODO(trevor-m): Change split_kv back to -1 when
|
||||||
// https://github.com/NVIDIA/cutlass/issues/2274 is fixed. Split_kv=1 will
|
// https://github.com/NVIDIA/cutlass/issues/2274 is fixed. Split_kv=1 will
|
||||||
// perform worse with larger context length and smaller batch sizes.
|
// perform worse with larger context length and smaller batch sizes.
|
||||||
num_kv_splits, // split_kv
|
static_cast<int>(num_kv_splits), // split_kv
|
||||||
nullptr, // is_var_split_kv
|
nullptr, // is_var_split_kv
|
||||||
};
|
};
|
||||||
// TODO(kaixih@nvidia): When split_kv=-1 and is_var_split_kv=false, we compute
|
// TODO(kaixih@nvidia): When split_kv=-1 and is_var_split_kv=false, we compute
|
||||||
@ -264,7 +264,7 @@ int64_t sm100_cutlass_mla_get_workspace_size(int64_t max_seq_len, int64_t num_ba
|
|||||||
// Assumes device 0 when getting sm_count.
|
// Assumes device 0 when getting sm_count.
|
||||||
arguments.hw_info.sm_count =
|
arguments.hw_info.sm_count =
|
||||||
sm_count <= 0 ? cutlass::KernelHardwareInfo::query_device_multiprocessor_count(/*device_id=*/0) : sm_count;
|
sm_count <= 0 ? cutlass::KernelHardwareInfo::query_device_multiprocessor_count(/*device_id=*/0) : sm_count;
|
||||||
arguments.split_kv = num_kv_splits;
|
arguments.split_kv = static_cast<int>(num_kv_splits);
|
||||||
MlaSm100Type::Fmha::set_split_kv(arguments);
|
MlaSm100Type::Fmha::set_split_kv(arguments);
|
||||||
|
|
||||||
return MlaSm100Type::Fmha::get_workspace_size(arguments);
|
return MlaSm100Type::Fmha::get_workspace_size(arguments);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user