From 7656cf4cf32b122cb0bf59a17b97a30e777065e0 Mon Sep 17 00:00:00 2001 From: Hongsheng Liu Date: Mon, 28 Jul 2025 13:43:50 +0800 Subject: [PATCH] [Bugfix] [issue-21565] Fix the incompatibility issue with stream and named function calling when Thinking is disabled (#21573) Signed-off-by: wangzi <3220100013@zju.edu.cn> Co-authored-by: wangzi <3220100013@zju.edu.cn> --- .../test_completion_with_function_calling.py | 13 +++++++++++-- vllm/entrypoints/openai/serving_chat.py | 17 ++++++++++++----- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/tests/entrypoints/openai/test_completion_with_function_calling.py b/tests/entrypoints/openai/test_completion_with_function_calling.py index eca048d855b5..a5b081f86107 100644 --- a/tests/entrypoints/openai/test_completion_with_function_calling.py +++ b/tests/entrypoints/openai/test_completion_with_function_calling.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from typing import Union + import openai # use the official client for correctness check import pytest import pytest_asyncio @@ -40,10 +42,17 @@ async def client(server): @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("stream", [True, False]) -@pytest.mark.parametrize("tool_choice", ["auto", "required"]) +@pytest.mark.parametrize("tool_choice", [ + "auto", "required", { + "type": "function", + "function": { + "name": "get_current_weather" + } + } +]) @pytest.mark.parametrize("enable_thinking", [True, False]) async def test_function_tool_use(client: openai.AsyncOpenAI, model_name: str, - stream: bool, tool_choice: str, + stream: bool, tool_choice: Union[str, dict], enable_thinking: bool): tools = [ { diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 832a3d501de0..e1d8a31672ed 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -623,7 +623,7 @@ class OpenAIServingChat(OpenAIServing): # handle streaming deltas for tools with named tool_choice if tool_choice_function_name: - if (self.reasoning_parser + if (self.reasoning_parser and not reasoning_end_arr[i] and not reasoning_parser.is_reasoning_end( previous_token_ids)): assert reasoning_parser is not None @@ -637,11 +637,18 @@ class OpenAIServingChat(OpenAIServing): current_token_ids, output.token_ids, )) - # When encountering think end id in delta_token_ids, - # process the `content`. Only keep 'content', - # remove 'reasoning_content' + # When encountering think end id in delta_token_ids + # or think end id in prompt_token_ids + # i.e {"enable_thinking": False}, + # set reasoning status to end. + # Only keep 'content', remove 'reasoning_content'. if reasoning_parser.is_reasoning_end( - list(output.token_ids)): + list(output.token_ids)) or \ + (res.prompt_token_ids and + reasoning_parser.is_reasoning_end( + list(res.prompt_token_ids) + )): + reasoning_end_arr[i] = True if delta_message and delta_message.content: # This need to be added to next `delta_text` current_text = delta_message.content