[Bugfix] [issue-21565] Fix the incompatibility issue with stream and named function calling when Thinking is disabled (#21573)

Signed-off-by: wangzi <3220100013@zju.edu.cn> Co-authored-by: wangzi <3220100013@zju.edu.cn>
2025-12-11 06:45:01 +08:00 · 2025-07-28 13:43:50 +08:00 · 2025-07-28 13:43:50 +08:00 · 7656cf4cf3
commit 7656cf4cf3
parent 3ea57a56d9
2 changed files with 23 additions and 7 deletions
--- a/tests/entrypoints/openai/test_completion_with_function_calling.py
+++ b/tests/entrypoints/openai/test_completion_with_function_calling.py
@ -1,6 +1,8 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from typing import Union
 import openai  # use the official client for correctness check
 import pytest
 import pytest_asyncio
@ -40,10 +42,17 @@ async def client(server):
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("stream", [True, False])
-@pytest.mark.parametrize("tool_choice", ["auto", "required"])
+@pytest.mark.parametrize("tool_choice", [
    "auto", "required", {
        "type": "function",
        "function": {
            "name": "get_current_weather"
        }
    }
 ])
@pytest.mark.parametrize("enable_thinking", [True, False])
 async def test_function_tool_use(client: openai.AsyncOpenAI, model_name: str,
-                                 stream: bool, tool_choice: str,
+                                 stream: bool, tool_choice: Union[str, dict],
                                 enable_thinking: bool):
    tools = [
        {
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@ -623,7 +623,7 @@ class OpenAIServingChat(OpenAIServing):
                    # handle streaming deltas for tools with named tool_choice
                    if tool_choice_function_name:
-                        if (self.reasoning_parser
+                        if (self.reasoning_parser and not reasoning_end_arr[i]
                                and not reasoning_parser.is_reasoning_end(
                                    previous_token_ids)):
                            assert reasoning_parser is not None
@ -637,11 +637,18 @@ class OpenAIServingChat(OpenAIServing):
                                    current_token_ids,
                                    output.token_ids,
                                ))
-                            # When encountering think end id in delta_token_ids,
+                            # When encountering think end id in delta_token_ids
-                            # process the `content`. Only keep 'content',
+                            # or think end id in prompt_token_ids
-                            # remove 'reasoning_content'
+                            # i.e {"enable_thinking": False},
                            # set reasoning status to end.
                            # Only keep 'content', remove 'reasoning_content'.
                            if reasoning_parser.is_reasoning_end(
-                                    list(output.token_ids)):
+                                    list(output.token_ids)) or \
                                    (res.prompt_token_ids and
                                        reasoning_parser.is_reasoning_end(
                                            list(res.prompt_token_ids)
                                        )):
                                reasoning_end_arr[i] = True
                                if delta_message and delta_message.content:
                                    # This need to be added to next `delta_text`
                                    current_text = delta_message.content