From 4de790fcad85abb0969da18bc9125889407c432a Mon Sep 17 00:00:00 2001 From: Chauncey Date: Wed, 4 Jun 2025 07:27:24 +0800 Subject: [PATCH] [Bugfix]: Fix the incompatibility issue with tool_choice 'required' when Thinking is enabled (#19075) Signed-off-by: chaunceyjiang --- .../test_completion_with_function_calling.py | 2 +- vllm/entrypoints/openai/serving_chat.py | 18 +++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/entrypoints/openai/test_completion_with_function_calling.py b/tests/entrypoints/openai/test_completion_with_function_calling.py index dbea2dc0b078..5c1f07832c2e 100644 --- a/tests/entrypoints/openai/test_completion_with_function_calling.py +++ b/tests/entrypoints/openai/test_completion_with_function_calling.py @@ -9,7 +9,7 @@ import pytest_asyncio from ...utils import RemoteOpenAIServer # any model with a chat template should work here -MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct" +MODEL_NAME = "Qwen/Qwen3-0.6B" @pytest.fixture(scope="module") diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 7e514d660be4..777b7f5bcde5 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -320,10 +320,13 @@ class OpenAIServingChat(OpenAIServing): def extract_tool_call_required_streaming( self, previous_text: str, - current_text: str, + current_text: Optional[str], delta_text: str, function_name_returned: bool, ) -> tuple[Optional[DeltaMessage], bool]: + if current_text is None or current_text == "": + # if the current text is empty, we cannot parse it + return None, function_name_returned try: obj = partial_json_parser.loads(current_text) except partial_json_parser.core.exceptions.MalformedJSON: @@ -650,10 +653,18 @@ class OpenAIServingChat(OpenAIServing): current_text = previous_text + delta_text fn_name_returned = function_name_returned[i] + if self.reasoning_parser: + _, content = \ + reasoning_parser.extract_reasoning_content( + current_text, + request + ) + else: + content = current_text delta_message, function_name_returned[i] = ( self.extract_tool_call_required_streaming( previous_text=previous_text, - current_text=current_text, + current_text=content, delta_text=delta_text, function_name_returned=fn_name_returned)) @@ -981,8 +992,9 @@ class OpenAIServingChat(OpenAIServing): # the fields of FunctionDefinition are a superset of the # tool call outputs and can be used for parsing + assert content is not None tool_calls = TypeAdapter( - list[FunctionDefinition]).validate_json(output.text) + list[FunctionDefinition]).validate_json(content) message = ChatMessage( role=role, content="",