From e15932bb60e645e533a4b2f999bec9c60328e6d3 Mon Sep 17 00:00:00 2001 From: Nelson Liu Date: Mon, 4 Sep 2023 08:50:55 -0700 Subject: [PATCH] Only emit warning about internal tokenizer if it isn't being used (#939) --- vllm/transformers_utils/tokenizer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index fbe430bdf5d84..4eb941d3c193b 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -25,7 +25,8 @@ def get_tokenizer( "Cannot use the fast tokenizer in slow tokenizer mode.") kwargs["use_fast"] = False - if "llama" in tokenizer_name.lower() and kwargs.get("use_fast", True): + if ("llama" in tokenizer_name.lower() and kwargs.get("use_fast", True) + and tokenizer_name != _FAST_LLAMA_TOKENIZER): logger.info( "For some LLaMA-based models, initializing the fast tokenizer may " "take a long time. To eliminate the initialization time, consider "