From 7656cf4cf32b122cb0bf59a17b97a30e777065e0 Mon Sep 17 00:00:00 2001
From: Hongsheng Liu <liuhongsheng4@huawei.com>
Date: Mon, 28 Jul 2025 13:43:50 +0800
Subject: [PATCH] [Bugfix] [issue-21565] Fix the incompatibility issue with
 stream and named function calling when Thinking is disabled (#21573)

Signed-off-by: wangzi <3220100013@zju.edu.cn>
Co-authored-by: wangzi <3220100013@zju.edu.cn>
---
 .../test_completion_with_function_calling.py    | 13 +++++++++++--
 vllm/entrypoints/openai/serving_chat.py         | 17 ++++++++++++-----
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/tests/entrypoints/openai/test_completion_with_function_calling.py b/tests/entrypoints/openai/test_completion_with_function_calling.py
index eca048d855b5..a5b081f86107 100644
--- a/tests/entrypoints/openai/test_completion_with_function_calling.py
+++ b/tests/entrypoints/openai/test_completion_with_function_calling.py
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from typing import Union
+
 import openai  # use the official client for correctness check
 import pytest
 import pytest_asyncio
@@ -40,10 +42,17 @@ async def client(server):
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize("stream", [True, False])
-@pytest.mark.parametrize("tool_choice", ["auto", "required"])
+@pytest.mark.parametrize("tool_choice", [
+    "auto", "required", {
+        "type": "function",
+        "function": {
+            "name": "get_current_weather"
+        }
+    }
+])
 @pytest.mark.parametrize("enable_thinking", [True, False])
 async def test_function_tool_use(client: openai.AsyncOpenAI, model_name: str,
-                                 stream: bool, tool_choice: str,
+                                 stream: bool, tool_choice: Union[str, dict],
                                  enable_thinking: bool):
     tools = [
         {
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index 832a3d501de0..e1d8a31672ed 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -623,7 +623,7 @@ class OpenAIServingChat(OpenAIServing):
 
                     # handle streaming deltas for tools with named tool_choice
                     if tool_choice_function_name:
-                        if (self.reasoning_parser
+                        if (self.reasoning_parser and not reasoning_end_arr[i]
                                 and not reasoning_parser.is_reasoning_end(
                                     previous_token_ids)):
                             assert reasoning_parser is not None
@@ -637,11 +637,18 @@ class OpenAIServingChat(OpenAIServing):
                                     current_token_ids,
                                     output.token_ids,
                                 ))
-                            # When encountering think end id in delta_token_ids,
-                            # process the `content`. Only keep 'content',
-                            # remove 'reasoning_content'
+                            # When encountering think end id in delta_token_ids
+                            # or think end id in prompt_token_ids
+                            # i.e {"enable_thinking": False},
+                            # set reasoning status to end.
+                            # Only keep 'content', remove 'reasoning_content'.
                             if reasoning_parser.is_reasoning_end(
-                                    list(output.token_ids)):
+                                    list(output.token_ids)) or \
+                                    (res.prompt_token_ids and
+                                        reasoning_parser.is_reasoning_end(
+                                            list(res.prompt_token_ids)
+                                        )):
+                                reasoning_end_arr[i] = True
                                 if delta_message and delta_message.content:
                                     # This need to be added to next `delta_text`
                                     current_text = delta_message.content