mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 04:54:56 +08:00
[Bug] Fix Assertion error DeepEP/csrc/kernels/intranode.cu:928: 'false and Unsupported type' (#26532)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
577c72a227
commit
7200a21cd1
@ -336,7 +336,11 @@ class DeepEPHTPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
|
||||
apply_router_weight_on_input=apply_router_weight_on_input,
|
||||
)
|
||||
dbo_yield_and_switch_from_compute_to_comm()
|
||||
assert fused_expert_output.dtype == torch.bfloat16, (
|
||||
f"Expected fused_expert_output bfloat16, got {fused_expert_output.dtype}"
|
||||
)
|
||||
combined_x, _, event = self.buffer.combine(
|
||||
# HT combine only supports BF16
|
||||
x=fused_expert_output,
|
||||
handle=handle,
|
||||
topk_weights=None,
|
||||
|
||||
@ -984,7 +984,7 @@ class FusedMoEModularKernel(torch.nn.Module):
|
||||
assert num_chunks == 0
|
||||
workspace13 = None
|
||||
workspace2 = None
|
||||
fused_out = torch.empty_like(a1q)
|
||||
fused_out = torch.empty_like(a1q, dtype=in_dtype)
|
||||
else:
|
||||
assert num_chunks > 0
|
||||
workspace13, workspace2, fused_out = self._allocate_buffers(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user