From 470ad118b6238e66094c9a508dea0aaaaf864093 Mon Sep 17 00:00:00 2001 From: Sungyoon Jeong <157349761+n0gu-furiosa@users.noreply.github.com> Date: Mon, 3 Nov 2025 13:21:18 +0900 Subject: [PATCH] [Frontend] Align finish_reason when tool is called with OpenAI (#25054) Signed-off-by: Sungyoon Jeong Co-authored-by: Chauncey --- vllm/entrypoints/openai/serving_chat.py | 26 +++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index bb770ecf03383..25979d5502b07 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -1170,9 +1170,13 @@ class OpenAIServingChat(OpenAIServing): ) # Send the finish response for each request.n only once + # In OpenAI's API, when a tool is called, the + # finish_reason is: + # "tool_calls" for "auto" or "required" tool calls, + # and "stop" for named tool calls. if ( auto_tools_called - or tools_streamed[i] + or (tools_streamed[i] and not tool_choice_function_name) or (self.use_harmony and harmony_tools_streamed[i]) ): finish_reason_ = "tool_calls" @@ -1523,18 +1527,24 @@ class OpenAIServingChat(OpenAIServing): message = ChatMessage( role=role, reasoning_content=reasoning_content, content=content ) + # In OpenAI's API, when a tool is called, the finish_reason is: + # "tool_calls" for "auto" or "required" tool calls, + # and "stop" for named tool calls. + is_finish_reason_tool_calls = auto_tools_called or ( + request.tool_choice + and request.tool_choice == "required" + and output.finish_reason == "stop" + ) choice_data = ChatCompletionResponseChoice( index=output.index, message=message, logprobs=logprobs, - finish_reason=( - "tool_calls" - if auto_tools_called - else output.finish_reason - if output.finish_reason - else "stop" - ), + finish_reason="tool_calls" + if is_finish_reason_tool_calls + else output.finish_reason + if output.finish_reason + else "stop", stop_reason=output.stop_reason, token_ids=( as_list(output.token_ids) if request.return_token_ids else None