mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-21 01:24:28 +08:00
[BugFix] Fix incremental detokenization perf issue (#16963)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
8d32dc603d
commit
e4d6144232
@ -161,7 +161,7 @@ class FastIncrementalDetokenizer(BaseIncrementalDetokenizer):
|
||||
prompt_suffix = request.prompt_token_ids
|
||||
prompt_len = len(prompt_suffix)
|
||||
if prompt_len > 4:
|
||||
for i in range(4, max(prompt_len + 1, 32)):
|
||||
for i in range(4, min(prompt_len + 1, 24)):
|
||||
suffix = request.prompt_token_ids[-i:]
|
||||
if '<EFBFBD>' not in self.tokenizer.decode(suffix):
|
||||
prompt_suffix = suffix
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user