diff --git a/benchmarks/kernels/benchmark_moe.py b/benchmarks/kernels/benchmark_moe.py index 5c8bf33afebc8..068830f02fb5e 100644 --- a/benchmarks/kernels/benchmark_moe.py +++ b/benchmarks/kernels/benchmark_moe.py @@ -343,9 +343,13 @@ class BenchmarkWorker: op_config = get_moe_configs(num_experts, shard_intermediate_size // 2, dtype_str) if op_config is None: - config = get_default_config(num_tokens, num_experts, - shard_intermediate_size, hidden_size, - topk, dtype_str) + config = get_default_config(num_tokens, + num_experts, + shard_intermediate_size, + hidden_size, + topk, + dtype_str, + is_marlin=False) else: config = op_config[min(op_config.keys(), key=lambda x: abs(x - num_tokens))] @@ -536,7 +540,11 @@ if __name__ == "__main__": parser.add_argument("--model", type=str, default="mistralai/Mixtral-8x7B-Instruct-v0.1") - parser.add_argument("--tp-size", "-tp", type=int, default=2) + parser.add_argument("--tp-size", + "-tp", + "--tensor-parallel-size", + type=int, + default=2) parser.add_argument("--dtype", type=str, choices=["auto", "fp8_w8a8", "int8_w8a16"],