mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 23:55:44 +08:00
Only turn on FastIncrementalDetokenizer when tokenizers >= 0.21.1 (#17158)
This commit is contained in:
parent
ef19e67d2c
commit
fc966e9cc6
@ -2,6 +2,8 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
import tokenizers
|
||||||
|
from packaging import version
|
||||||
from tokenizers import Tokenizer
|
from tokenizers import Tokenizer
|
||||||
from tokenizers.decoders import DecodeStream
|
from tokenizers.decoders import DecodeStream
|
||||||
from transformers import PreTrainedTokenizerFast
|
from transformers import PreTrainedTokenizerFast
|
||||||
@ -43,8 +45,10 @@ class IncrementalDetokenizer:
|
|||||||
# No tokenizer => skipping detokenization.
|
# No tokenizer => skipping detokenization.
|
||||||
return IncrementalDetokenizer()
|
return IncrementalDetokenizer()
|
||||||
|
|
||||||
if isinstance(tokenizer, PreTrainedTokenizerFast):
|
if (isinstance(tokenizer, PreTrainedTokenizerFast) and version.parse(
|
||||||
|
tokenizers.__version__) >= version.parse("0.21.1")):
|
||||||
# Fast tokenizer => use tokenizers library DecodeStream.
|
# Fast tokenizer => use tokenizers library DecodeStream.
|
||||||
|
# And only tokenizers >= 0.21.1 supports Fast Detokenizer.
|
||||||
return FastIncrementalDetokenizer(tokenizer, request)
|
return FastIncrementalDetokenizer(tokenizer, request)
|
||||||
|
|
||||||
# Fall back to slow python-based incremental detokenization.
|
# Fall back to slow python-based incremental detokenization.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user