[Frontend] Skip unnecessary detokenization when token_id is requested (#24236)

Signed-off-by: NickLucche <nlucches@redhat.com>
2026-03-16 14:07:13 +08:00 · 2025-09-05 01:04:12 +02:00 · 2025-09-05 01:04:12 +02:00 · 65e038931d
commit 65e038931d
parent 886ccbe5ba
1 changed files with 2 additions and 1 deletions
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@ -1419,9 +1419,10 @@ class OpenAIServingChat(OpenAIServing):
            step_top_logprobs = top_logprobs[i]
            if step_top_logprobs is None or step_top_logprobs.get(
                    token_id) is None:
-                token = tokenizer.decode(token_id)
                if should_return_as_token_id:
                    token = f"token_id:{token_id}"
+                else:
+                    token = tokenizer.decode(token_id)

                logprobs_content.append(
                    ChatCompletionLogProbsContent(