diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index bb770ecf03383..25979d5502b07 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -1170,9 +1170,13 @@ class OpenAIServingChat(OpenAIServing): ) # Send the finish response for each request.n only once + # In OpenAI's API, when a tool is called, the + # finish_reason is: + # "tool_calls" for "auto" or "required" tool calls, + # and "stop" for named tool calls. if ( auto_tools_called - or tools_streamed[i] + or (tools_streamed[i] and not tool_choice_function_name) or (self.use_harmony and harmony_tools_streamed[i]) ): finish_reason_ = "tool_calls" @@ -1523,18 +1527,24 @@ class OpenAIServingChat(OpenAIServing): message = ChatMessage( role=role, reasoning_content=reasoning_content, content=content ) + # In OpenAI's API, when a tool is called, the finish_reason is: + # "tool_calls" for "auto" or "required" tool calls, + # and "stop" for named tool calls. + is_finish_reason_tool_calls = auto_tools_called or ( + request.tool_choice + and request.tool_choice == "required" + and output.finish_reason == "stop" + ) choice_data = ChatCompletionResponseChoice( index=output.index, message=message, logprobs=logprobs, - finish_reason=( - "tool_calls" - if auto_tools_called - else output.finish_reason - if output.finish_reason - else "stop" - ), + finish_reason="tool_calls" + if is_finish_reason_tool_calls + else output.finish_reason + if output.finish_reason + else "stop", stop_reason=output.stop_reason, token_ids=( as_list(output.token_ids) if request.return_token_ids else None