From c6db21313cef3f1f9aa5714efba0d60b5f1a8dad Mon Sep 17 00:00:00 2001 From: elijah <30852919+e1ijah1@users.noreply.github.com> Date: Mon, 13 Jan 2025 23:22:07 +0800 Subject: [PATCH] bugfix: Fix signature mismatch in benchmark's `get_tokenizer` function (#11982) Signed-off-by: elijah --- benchmarks/backend_request_func.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index b67849038cf0..9d71e4ecc4a3 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -417,14 +417,35 @@ def get_model(pretrained_model_name_or_path: str) -> str: def get_tokenizer( - pretrained_model_name_or_path: str, trust_remote_code: bool + pretrained_model_name_or_path: str, + tokenizer_mode: str = "auto", + trust_remote_code: bool = False, + **kwargs, ) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]: if pretrained_model_name_or_path is not None and not os.path.exists( pretrained_model_name_or_path): pretrained_model_name_or_path = get_model( pretrained_model_name_or_path) - return AutoTokenizer.from_pretrained(pretrained_model_name_or_path, - trust_remote_code=trust_remote_code) + if tokenizer_mode == "slow": + if kwargs.get("use_fast", False): + raise ValueError( + "Cannot use the fast tokenizer in slow tokenizer mode.") + kwargs["use_fast"] = False + if tokenizer_mode == "mistral": + try: + from vllm.transformers_utils.tokenizer import MistralTokenizer + except ImportError as e: + raise ImportError("MistralTokenizer requires vllm package.\n" + "Please install it with `pip install vllm` " + "to use mistral tokenizer mode.") from e + return MistralTokenizer.from_pretrained( + str(pretrained_model_name_or_path)) + else: + return AutoTokenizer.from_pretrained( + pretrained_model_name_or_path, + trust_remote_code=trust_remote_code, + **kwargs, + ) ASYNC_REQUEST_FUNCS = {