[BugFix] Fix incremental detokenization perf issue (#16963)

Signed-off-by: Nick Hill <nhill@redhat.com>
2026-01-21 01:24:28 +08:00 · 2025-04-22 01:16:19 -07:00 · 2025-04-22 01:16:19 -07:00 · e4d6144232
commit e4d6144232
parent 8d32dc603d
1 changed files with 1 additions and 1 deletions
--- a/vllm/v1/engine/detokenizer.py
+++ b/vllm/v1/engine/detokenizer.py
@ -161,7 +161,7 @@ class FastIncrementalDetokenizer(BaseIncrementalDetokenizer):
        prompt_suffix = request.prompt_token_ids
        prompt_len = len(prompt_suffix)
        if prompt_len > 4:
-            for i in range(4, max(prompt_len + 1, 32)):
+            for i in range(4, min(prompt_len + 1, 24)):
                suffix = request.prompt_token_ids[-i:]
                if '<EFBFBD>' not in self.tokenizer.decode(suffix):
                    prompt_suffix = suffix