mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-23 13:17:04 +08:00
Update fused_marlin_moe_fake
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
d643d6a418
commit
e8c78d992c
@ -212,6 +212,8 @@ def fused_marlin_moe(hidden_states: torch.Tensor,
|
||||
def fused_marlin_moe_fake(hidden_states: torch.Tensor,
|
||||
w1: torch.Tensor,
|
||||
w2: torch.Tensor,
|
||||
bias1: Optional[torch.Tensor],
|
||||
bias2: Optional[torch.Tensor],
|
||||
w1_scale: torch.Tensor,
|
||||
w2_scale: torch.Tensor,
|
||||
gating_output: torch.Tensor,
|
||||
@ -220,9 +222,10 @@ def fused_marlin_moe_fake(hidden_states: torch.Tensor,
|
||||
quant_type_id: int,
|
||||
apply_router_weight_on_input: bool = False,
|
||||
global_num_experts: int = -1,
|
||||
activation: Optional[str] = "silu",
|
||||
expert_map: Optional[torch.Tensor] = None,
|
||||
global_scale1: Optional[torch.Tensor] = None,
|
||||
global_scale2: Optional[torch.Tensor] = None,
|
||||
expert_map: Optional[torch.Tensor] = None,
|
||||
g_idx1: Optional[torch.Tensor] = None,
|
||||
g_idx2: Optional[torch.Tensor] = None,
|
||||
sort_indices1: Optional[torch.Tensor] = None,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user