From 238c0d93b40008244fae64530d82f1860b1f9121 Mon Sep 17 00:00:00 2001 From: Alexander Matveev <59768536+alexm-neuralmagic@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:19:10 -0500 Subject: [PATCH] [Misc] Add tokenizer_mode param to benchmark_serving.py (#11174) Signed-off-by: Alexander Matveev --- benchmarks/benchmark_serving.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 3256692142c5e..4eb0e1f8ac903 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -781,6 +781,7 @@ def main(args: argparse.Namespace): backend = args.backend model_id = args.model tokenizer_id = args.tokenizer if args.tokenizer is not None else args.model + tokenizer_mode = args.tokenizer_mode if args.base_url is not None: api_url = f"{args.base_url}{args.endpoint}" @@ -790,6 +791,7 @@ def main(args: argparse.Namespace): base_url = f"http://{args.host}:{args.port}" tokenizer = get_tokenizer(tokenizer_id, + tokenizer_mode=tokenizer_mode, trust_remote_code=args.trust_remote_code) if args.dataset is not None: @@ -1210,5 +1212,15 @@ if __name__ == "__main__": "from the sampled HF dataset.", ) + parser.add_argument( + '--tokenizer-mode', + type=str, + default="auto", + choices=['auto', 'slow', 'mistral'], + help='The tokenizer mode.\n\n* "auto" will use the ' + 'fast tokenizer if available.\n* "slow" will ' + 'always use the slow tokenizer. \n* ' + '"mistral" will always use the `mistral_common` tokenizer.') + args = parser.parse_args() main(args)