mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-07 16:22:17 +08:00
[Frontend] Skip unnecessary detokenization when token_id is requested (#24236)
Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
parent
886ccbe5ba
commit
65e038931d
@ -1419,9 +1419,10 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
step_top_logprobs = top_logprobs[i]
|
step_top_logprobs = top_logprobs[i]
|
||||||
if step_top_logprobs is None or step_top_logprobs.get(
|
if step_top_logprobs is None or step_top_logprobs.get(
|
||||||
token_id) is None:
|
token_id) is None:
|
||||||
token = tokenizer.decode(token_id)
|
|
||||||
if should_return_as_token_id:
|
if should_return_as_token_id:
|
||||||
token = f"token_id:{token_id}"
|
token = f"token_id:{token_id}"
|
||||||
|
else:
|
||||||
|
token = tokenizer.decode(token_id)
|
||||||
|
|
||||||
logprobs_content.append(
|
logprobs_content.append(
|
||||||
ChatCompletionLogProbsContent(
|
ChatCompletionLogProbsContent(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user