From 65e038931d8599dd9ab80ca5b53d5573d5b74fd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= <nlucches@redhat.com>
Date: Fri, 5 Sep 2025 01:04:12 +0200
Subject: [PATCH] [Frontend] Skip unnecessary detokenization when token_id is
 requested (#24236)

Signed-off-by: NickLucche <nlucches@redhat.com>
---
 vllm/entrypoints/openai/serving_chat.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index 35edd2f85cd07..fff6dcd724ad6 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -1419,9 +1419,10 @@ class OpenAIServingChat(OpenAIServing):
             step_top_logprobs = top_logprobs[i]
             if step_top_logprobs is None or step_top_logprobs.get(
                     token_id) is None:
-                token = tokenizer.decode(token_id)
                 if should_return_as_token_id:
                     token = f"token_id:{token_id}"
+                else:
+                    token = tokenizer.decode(token_id)
 
                 logprobs_content.append(
                     ChatCompletionLogProbsContent(