diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 83a92a98026e..5c11836fbff4 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -1111,7 +1111,8 @@ class OpenAIServingChat(OpenAIServing): return_as_token_id is not None else self.return_tokens_as_token_ids for i, token_id in enumerate(token_ids): step_top_logprobs = top_logprobs[i] - if step_top_logprobs is None: + if step_top_logprobs is None or step_top_logprobs.get( + token_id) is None: token = tokenizer.decode(token_id) if should_return_as_token_id: token = f"token_id:{token_id}"