[Frontend] Skip unnecessary detokenization when token_id is requested (#24236)

Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
Nicolò Lucchesi 2025-09-05 01:04:12 +02:00 committed by GitHub
parent 886ccbe5ba
commit 65e038931d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1419,9 +1419,10 @@ class OpenAIServingChat(OpenAIServing):
step_top_logprobs = top_logprobs[i]
if step_top_logprobs is None or step_top_logprobs.get(
token_id) is None:
token = tokenizer.decode(token_id)
if should_return_as_token_id:
token = f"token_id:{token_id}"
else:
token = tokenizer.decode(token_id)
logprobs_content.append(
ChatCompletionLogProbsContent(