From fc966e9cc6a6e01f642c721514de008a06a57203 Mon Sep 17 00:00:00 2001 From: Lu Fang <30275821+houseroad@users.noreply.github.com> Date: Fri, 25 Apr 2025 02:10:32 -0700 Subject: [PATCH] Only turn on FastIncrementalDetokenizer when tokenizers >= 0.21.1 (#17158) --- vllm/v1/engine/detokenizer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/v1/engine/detokenizer.py b/vllm/v1/engine/detokenizer.py index 330a3f6dad90..dca327cc5d07 100644 --- a/vllm/v1/engine/detokenizer.py +++ b/vllm/v1/engine/detokenizer.py @@ -2,6 +2,8 @@ from abc import ABC, abstractmethod from typing import Optional +import tokenizers +from packaging import version from tokenizers import Tokenizer from tokenizers.decoders import DecodeStream from transformers import PreTrainedTokenizerFast @@ -43,8 +45,10 @@ class IncrementalDetokenizer: # No tokenizer => skipping detokenization. return IncrementalDetokenizer() - if isinstance(tokenizer, PreTrainedTokenizerFast): + if (isinstance(tokenizer, PreTrainedTokenizerFast) and version.parse( + tokenizers.__version__) >= version.parse("0.21.1")): # Fast tokenizer => use tokenizers library DecodeStream. + # And only tokenizers >= 0.21.1 supports Fast Detokenizer. return FastIncrementalDetokenizer(tokenizer, request) # Fall back to slow python-based incremental detokenization.