mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-02 16:04:28 +08:00
[Bugfix] fix IMA issue in certain cases of the moe marlin kernel (#28619)
Signed-off-by: Jinzhen Lin <jinzhen.ljz@antgroup.com> Co-authored-by: youkaichao <youkaichao@gmail.com> Co-authored-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
This commit is contained in:
parent
77740191de
commit
a67dec7cba
@ -489,14 +489,16 @@ __global__ void Marlin(
|
|||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
int idx = tid4 * 4 + i;
|
int idx = tid4 * 4 + i;
|
||||||
idx = idx < block_num_valid_tokens ? idx : 0;
|
if (idx < block_num_valid_tokens) {
|
||||||
if constexpr (w_type == vllm::kFE2M1f && s_type == vllm::kFE4M3fn) {
|
if constexpr (w_type == vllm::kFE2M1f && s_type == vllm::kFE4M3fn) {
|
||||||
sh_block_topk_weights[idx] = __hmul2(
|
sh_block_topk_weights[idx] =
|
||||||
global_scale, Dtype::num2num2(Dtype::float2num(
|
__hmul2(global_scale,
|
||||||
topk_weights_ptr[sh_block_sorted_ids[idx]])));
|
Dtype::num2num2(Dtype::float2num(
|
||||||
} else {
|
topk_weights_ptr[sh_block_sorted_ids[idx]])));
|
||||||
sh_block_topk_weights[idx] = Dtype::num2num2(
|
} else {
|
||||||
Dtype::float2num(topk_weights_ptr[sh_block_sorted_ids[idx]]));
|
sh_block_topk_weights[idx] = Dtype::num2num2(
|
||||||
|
Dtype::float2num(topk_weights_ptr[sh_block_sorted_ids[idx]]));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -38,7 +38,6 @@ class SharedFusedMoE(FusedMoE):
|
|||||||
# TODO(wentao): find the root cause and remove this condition
|
# TODO(wentao): find the root cause and remove this condition
|
||||||
self.enable_eplb
|
self.enable_eplb
|
||||||
or (self.moe_config.use_flashinfer_cutlass_kernels and self.dp_size > 1)
|
or (self.moe_config.use_flashinfer_cutlass_kernels and self.dp_size > 1)
|
||||||
or self.use_marlin_kernels
|
|
||||||
)
|
)
|
||||||
and self._shared_experts is not None
|
and self._shared_experts is not None
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user