From e4d614423283d8beae4a92aeb537a49ad2662864 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Tue, 22 Apr 2025 01:16:19 -0700 Subject: [PATCH] [BugFix] Fix incremental detokenization perf issue (#16963) Signed-off-by: Nick Hill --- vllm/v1/engine/detokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/engine/detokenizer.py b/vllm/v1/engine/detokenizer.py index 006d53d8f1288..330a3f6dad90e 100644 --- a/vllm/v1/engine/detokenizer.py +++ b/vllm/v1/engine/detokenizer.py @@ -161,7 +161,7 @@ class FastIncrementalDetokenizer(BaseIncrementalDetokenizer): prompt_suffix = request.prompt_token_ids prompt_len = len(prompt_suffix) if prompt_len > 4: - for i in range(4, max(prompt_len + 1, 32)): + for i in range(4, min(prompt_len + 1, 24)): suffix = request.prompt_token_ids[-i:] if '�' not in self.tokenizer.decode(suffix): prompt_suffix = suffix