diff --git a/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py b/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py index cc51ecdbacdef..ac8d76d2c2175 100644 --- a/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py @@ -212,6 +212,8 @@ def fused_marlin_moe(hidden_states: torch.Tensor, def fused_marlin_moe_fake(hidden_states: torch.Tensor, w1: torch.Tensor, w2: torch.Tensor, + bias1: Optional[torch.Tensor], + bias2: Optional[torch.Tensor], w1_scale: torch.Tensor, w2_scale: torch.Tensor, gating_output: torch.Tensor, @@ -220,9 +222,10 @@ def fused_marlin_moe_fake(hidden_states: torch.Tensor, quant_type_id: int, apply_router_weight_on_input: bool = False, global_num_experts: int = -1, + activation: Optional[str] = "silu", + expert_map: Optional[torch.Tensor] = None, global_scale1: Optional[torch.Tensor] = None, global_scale2: Optional[torch.Tensor] = None, - expert_map: Optional[torch.Tensor] = None, g_idx1: Optional[torch.Tensor] = None, g_idx2: Optional[torch.Tensor] = None, sort_indices1: Optional[torch.Tensor] = None,