From 470ad118b6238e66094c9a508dea0aaaaf864093 Mon Sep 17 00:00:00 2001
From: Sungyoon Jeong <157349761+n0gu-furiosa@users.noreply.github.com>
Date: Mon, 3 Nov 2025 13:21:18 +0900
Subject: [PATCH] [Frontend] Align finish_reason when tool is called with
 OpenAI (#25054)

Signed-off-by: Sungyoon Jeong <sungyoon.jeong@furiosa.ai>
Co-authored-by: Chauncey <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/serving_chat.py | 26 +++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index bb770ecf03383..25979d5502b07 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -1170,9 +1170,13 @@ class OpenAIServingChat(OpenAIServing):
                             )
 
                         # Send the finish response for each request.n only once
+                        # In OpenAI's API, when a tool is called, the
+                        # finish_reason is:
+                        # "tool_calls" for "auto" or "required" tool calls,
+                        # and "stop" for named tool calls.
                         if (
                             auto_tools_called
-                            or tools_streamed[i]
+                            or (tools_streamed[i] and not tool_choice_function_name)
                             or (self.use_harmony and harmony_tools_streamed[i])
                         ):
                             finish_reason_ = "tool_calls"
@@ -1523,18 +1527,24 @@ class OpenAIServingChat(OpenAIServing):
                 message = ChatMessage(
                     role=role, reasoning_content=reasoning_content, content=content
                 )
+            # In OpenAI's API, when a tool is called, the finish_reason is:
+            # "tool_calls" for "auto" or "required" tool calls,
+            # and "stop" for named tool calls.
+            is_finish_reason_tool_calls = auto_tools_called or (
+                request.tool_choice
+                and request.tool_choice == "required"
+                and output.finish_reason == "stop"
+            )
 
             choice_data = ChatCompletionResponseChoice(
                 index=output.index,
                 message=message,
                 logprobs=logprobs,
-                finish_reason=(
-                    "tool_calls"
-                    if auto_tools_called
-                    else output.finish_reason
-                    if output.finish_reason
-                    else "stop"
-                ),
+                finish_reason="tool_calls"
+                if is_finish_reason_tool_calls
+                else output.finish_reason
+                if output.finish_reason
+                else "stop",
                 stop_reason=output.stop_reason,
                 token_ids=(
                     as_list(output.token_ids) if request.return_token_ids else None