[Bugfix] Fix missing per_act_token parameter in compressed_tensors_moe (#20509)

Signed-off-by: Lu Fang <fanglu@fb.com>
This commit is contained in:
Lucia Fang 2025-07-06 12:08:30 +08:00 committed by GitHub
parent f73d02aadc
commit 432870829d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -322,7 +322,7 @@ def cutlass_moe_fp8(
topk_ids: torch.Tensor,
w1_scale: torch.Tensor,
w2_scale: torch.Tensor,
per_act_token: bool,
per_act_token: Optional[bool] = None,
activation: str = "silu",
a1_scale: Optional[torch.Tensor] = None,
a2_scale: Optional[torch.Tensor] = None,
@ -366,6 +366,9 @@ def cutlass_moe_fp8(
Returns:
- torch.Tensor: The fp16 output tensor after applying the MoE layer.
"""
if per_act_token is None:
per_act_token = a1_scale.numel() != 1 if a1_scale is not None else (
a2_scale.numel() != 1 if a2_scale is not None else False)
per_out_ch = w1_scale.numel() != w1_q.size(0)
num_experts = global_num_experts if global_num_experts != -1 else w1_q.size(