[Bug] Fix Assertion error DeepEP/csrc/kernels/intranode.cu:928: 'false and Unsupported type' (#26532)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
2026-03-16 13:57:12 +08:00 · 2025-10-13 18:26:37 -04:00 · 2025-10-13 18:26:37 -04:00 · 7200a21cd1
commit 7200a21cd1
parent 577c72a227
2 changed files with 5 additions and 1 deletions
--- a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py
+++ b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py
@ -336,7 +336,11 @@ class DeepEPHTPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
                apply_router_weight_on_input=apply_router_weight_on_input,
            )
        dbo_yield_and_switch_from_compute_to_comm()
+        assert fused_expert_output.dtype == torch.bfloat16, (
+            f"Expected fused_expert_output bfloat16, got {fused_expert_output.dtype}"
+        )
        combined_x, _, event = self.buffer.combine(
+            # HT combine only supports BF16
            x=fused_expert_output,
            handle=handle,
            topk_weights=None,
--- a/vllm/model_executor/layers/fused_moe/modular_kernel.py
+++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py
@ -984,7 +984,7 @@ class FusedMoEModularKernel(torch.nn.Module):
            assert num_chunks == 0
            workspace13 = None
            workspace2 = None
-            fused_out = torch.empty_like(a1q)
+            fused_out = torch.empty_like(a1q, dtype=in_dtype)
        else:
            assert num_chunks > 0
            workspace13, workspace2, fused_out = self._allocate_buffers(