[BugFix] Fix tool call finish reason in streaming case (#9209)

Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
2025-12-21 16:45:01 +08:00 · 2024-10-11 22:24:26 -03:00 · 2024-10-11 22:24:26 -03:00 · ec10cb8511
commit ec10cb8511
parent d11b46f3a5
1 changed files with 15 additions and 11 deletions
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@ -538,10 +538,12 @@ class OpenAIServingChat(OpenAIServing):
                        #   any tokens that were generated but previously
                        #   matched by partial json parsing
                        # only happens if we are NOT using guided decoding
                        auto_tools_called = False
                        if tool_parser:
-                            index = len(
+                            auto_tools_called = len(
-                                tool_parser.prev_tool_call_arr) - 1 if len(
+                                tool_parser.prev_tool_call_arr) > 0
-                                    tool_parser.prev_tool_call_arr) > 0 else 0
+                            index = len(tool_parser.prev_tool_call_arr
                                        ) - 1 if auto_tools_called else 0
                        else:
                            index = 0
@ -576,9 +578,7 @@ class OpenAIServingChat(OpenAIServing):
                            delta=delta_message,
                            logprobs=logprobs,
                            finish_reason=output.finish_reason
-                            if not (tool_parser
+                            if not auto_tools_called else "tool_calls",
                                    and len(tool_parser.prev_tool_call_arr))
                            else "tool_calls",
                            stop_reason=output.stop_reason)
                        chunk = ChatCompletionStreamResponse(
                            id=request_id,
@ -680,8 +680,10 @@ class OpenAIServingChat(OpenAIServing):
            else:
                logprobs = None
-            # by default, tools are not used.
+            # In the OpenAI API the finish_reason is "tools_called"
-            tools_called = False
+            # if the tool choice is auto and the model produced a tool
            # call. The same is not true for named function calls
            auto_tools_called = False
            # if auto tools are not enabled, and a named tool choice using
            #   outlines is not being used
@ -703,7 +705,6 @@ class OpenAIServingChat(OpenAIServing):
                            name=request.tool_choice.function.name,
                            arguments=output.text))
                    ])
                tools_called = True
            # if the request doesn't use tool choice
            # OR specifies to not use a tool
@ -725,7 +726,10 @@ class OpenAIServingChat(OpenAIServing):
                tool_call_info = tool_parser.extract_tool_calls(
                    output.text, request=request)
-                tools_called = tool_call_info.tools_called
+                # In the OpenAI API the finish_reason is "tools_called"
                # if the tool choice is auto and the model produced a tool
                # call. The same is not true for named function calls
                auto_tools_called = tool_call_info.tools_called
                if tool_call_info.tools_called:
                    message = ChatMessage(role=role,
                                          content=tool_call_info.content,
@ -748,7 +752,7 @@ class OpenAIServingChat(OpenAIServing):
                index=output.index,
                message=message,
                logprobs=logprobs,
-                finish_reason="tool_calls" if tools_called else
+                finish_reason="tool_calls" if auto_tools_called else
                output.finish_reason if output.finish_reason else "stop",
                stop_reason=output.stop_reason)
            choices.append(choice_data)