diff --git a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py index 660bae3146026..e49750bc92b3b 100644 --- a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py @@ -355,7 +355,7 @@ def batched_triton_kernel( def invoke_moe_batched_triton_kernel( A: torch.Tensor, # [E, max_tokens, K] - B: torch.Tensor, # [E, K, N] + B: torch.Tensor, # [E, N, K] C: torch.Tensor, # [E, max_tokens, N] expert_num_tokens: torch.Tensor, # [E] compute_type: tl.dtype,