diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index fbe430bdf5d8..4eb941d3c193 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -25,7 +25,8 @@ def get_tokenizer( "Cannot use the fast tokenizer in slow tokenizer mode.") kwargs["use_fast"] = False - if "llama" in tokenizer_name.lower() and kwargs.get("use_fast", True): + if ("llama" in tokenizer_name.lower() and kwargs.get("use_fast", True) + and tokenizer_name != _FAST_LLAMA_TOKENIZER): logger.info( "For some LLaMA-based models, initializing the fast tokenizer may " "take a long time. To eliminate the initialization time, consider "