[Bugfix] Fix grouped_topk pytorch impl when num_experts can't be grouped properly (#29439)

Signed-off-by: Divakar Verma <divakar.verma@amd.com> Co-authored-by: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com> Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
2026-05-30 03:37:04 +08:00 · 2025-12-10 21:47:18 -06:00 · 2025-12-10 21:47:18 -06:00 · d1e1fb4363
commit d1e1fb4363
parent b51255f369
1 changed files with 9 additions and 1 deletions
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@ -1556,6 +1556,14 @@ class FusedMoE(CustomOp):
                    f"EPLB is not supported for {self.quant_method.method_name}."
                )
        def valid_grouping() -> bool:
            # Check if num_experts is greater than num_expert_group
            # and is divisible by num_expert_group
            num_experts = router_logits.shape[-1]
            if num_experts <= self.num_expert_group:
                return False
            return num_experts % self.num_expert_group == 0
        indices_type = self.quant_method.topk_indices_dtype
        # Check if we should use a routing simulation strategy
@ -1570,7 +1578,7 @@ class FusedMoE(CustomOp):
            )
        # DeepSeekv2 uses grouped_top_k
-        elif self.use_grouped_topk:
+        elif self.use_grouped_topk and valid_grouping():
            assert self.topk_group is not None
            assert self.num_expert_group is not None
            if rocm_aiter_ops.is_fused_moe_enabled():