Only turn on FastIncrementalDetokenizer when tokenizers >= 0.21.1 (#17158)

This commit is contained in:
Lu Fang 2025-04-25 02:10:32 -07:00 committed by GitHub
parent ef19e67d2c
commit fc966e9cc6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2,6 +2,8 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Optional from typing import Optional
import tokenizers
from packaging import version
from tokenizers import Tokenizer from tokenizers import Tokenizer
from tokenizers.decoders import DecodeStream from tokenizers.decoders import DecodeStream
from transformers import PreTrainedTokenizerFast from transformers import PreTrainedTokenizerFast
@ -43,8 +45,10 @@ class IncrementalDetokenizer:
# No tokenizer => skipping detokenization. # No tokenizer => skipping detokenization.
return IncrementalDetokenizer() return IncrementalDetokenizer()
if isinstance(tokenizer, PreTrainedTokenizerFast): if (isinstance(tokenizer, PreTrainedTokenizerFast) and version.parse(
tokenizers.__version__) >= version.parse("0.21.1")):
# Fast tokenizer => use tokenizers library DecodeStream. # Fast tokenizer => use tokenizers library DecodeStream.
# And only tokenizers >= 0.21.1 supports Fast Detokenizer.
return FastIncrementalDetokenizer(tokenizer, request) return FastIncrementalDetokenizer(tokenizer, request)
# Fall back to slow python-based incremental detokenization. # Fall back to slow python-based incremental detokenization.