From e8c78d992c789b03518ad69d3b21601181f8444a Mon Sep 17 00:00:00 2001 From: mgoin Date: Thu, 18 Sep 2025 11:43:58 -0700 Subject: [PATCH] Update fused_marlin_moe_fake Signed-off-by: mgoin --- vllm/model_executor/layers/fused_moe/fused_marlin_moe.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py b/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py index cc51ecdbacdef..ac8d76d2c2175 100644 --- a/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py @@ -212,6 +212,8 @@ def fused_marlin_moe(hidden_states: torch.Tensor, def fused_marlin_moe_fake(hidden_states: torch.Tensor, w1: torch.Tensor, w2: torch.Tensor, + bias1: Optional[torch.Tensor], + bias2: Optional[torch.Tensor], w1_scale: torch.Tensor, w2_scale: torch.Tensor, gating_output: torch.Tensor, @@ -220,9 +222,10 @@ def fused_marlin_moe_fake(hidden_states: torch.Tensor, quant_type_id: int, apply_router_weight_on_input: bool = False, global_num_experts: int = -1, + activation: Optional[str] = "silu", + expert_map: Optional[torch.Tensor] = None, global_scale1: Optional[torch.Tensor] = None, global_scale2: Optional[torch.Tensor] = None, - expert_map: Optional[torch.Tensor] = None, g_idx1: Optional[torch.Tensor] = None, g_idx2: Optional[torch.Tensor] = None, sort_indices1: Optional[torch.Tensor] = None,