From cfa134d2475096eae47a58d14a6dec4c3fba9294 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Sat, 1 Feb 2025 00:41:35 -0500 Subject: [PATCH] [Bugfix/CI] Fixup benchmark_moe.py (#12562) Fixes `is_marlin` not being passed into `get_default_config` Also allow `--tensor-parallel-size` in addition to `-tp` and `--tp-size` Signed-off-by: Tyler Michael Smith --- benchmarks/kernels/benchmark_moe.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/benchmarks/kernels/benchmark_moe.py b/benchmarks/kernels/benchmark_moe.py index 5c8bf33afebc8..068830f02fb5e 100644 --- a/benchmarks/kernels/benchmark_moe.py +++ b/benchmarks/kernels/benchmark_moe.py @@ -343,9 +343,13 @@ class BenchmarkWorker: op_config = get_moe_configs(num_experts, shard_intermediate_size // 2, dtype_str) if op_config is None: - config = get_default_config(num_tokens, num_experts, - shard_intermediate_size, hidden_size, - topk, dtype_str) + config = get_default_config(num_tokens, + num_experts, + shard_intermediate_size, + hidden_size, + topk, + dtype_str, + is_marlin=False) else: config = op_config[min(op_config.keys(), key=lambda x: abs(x - num_tokens))] @@ -536,7 +540,11 @@ if __name__ == "__main__": parser.add_argument("--model", type=str, default="mistralai/Mixtral-8x7B-Instruct-v0.1") - parser.add_argument("--tp-size", "-tp", type=int, default=2) + parser.add_argument("--tp-size", + "-tp", + "--tensor-parallel-size", + type=int, + default=2) parser.add_argument("--dtype", type=str, choices=["auto", "fp8_w8a8", "int8_w8a16"],