diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index a2d5fb576390..888aa4eb6fa8 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -1372,6 +1372,9 @@ class OpenAIServingChat(OpenAIServing): else "stop" ), stop_reason=output.stop_reason, + token_ids=( + as_list(output.token_ids) if request.return_token_ids else None + ), ) choices.append(choice_data) continue