diff --git a/tests/entrypoints/openai/test_completion_with_function_calling.py b/tests/entrypoints/openai/test_completion_with_function_calling.py index eca048d855b5..a5b081f86107 100644 --- a/tests/entrypoints/openai/test_completion_with_function_calling.py +++ b/tests/entrypoints/openai/test_completion_with_function_calling.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from typing import Union + import openai # use the official client for correctness check import pytest import pytest_asyncio @@ -40,10 +42,17 @@ async def client(server): @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("stream", [True, False]) -@pytest.mark.parametrize("tool_choice", ["auto", "required"]) +@pytest.mark.parametrize("tool_choice", [ + "auto", "required", { + "type": "function", + "function": { + "name": "get_current_weather" + } + } +]) @pytest.mark.parametrize("enable_thinking", [True, False]) async def test_function_tool_use(client: openai.AsyncOpenAI, model_name: str, - stream: bool, tool_choice: str, + stream: bool, tool_choice: Union[str, dict], enable_thinking: bool): tools = [ { diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 832a3d501de0..e1d8a31672ed 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -623,7 +623,7 @@ class OpenAIServingChat(OpenAIServing): # handle streaming deltas for tools with named tool_choice if tool_choice_function_name: - if (self.reasoning_parser + if (self.reasoning_parser and not reasoning_end_arr[i] and not reasoning_parser.is_reasoning_end( previous_token_ids)): assert reasoning_parser is not None @@ -637,11 +637,18 @@ class OpenAIServingChat(OpenAIServing): current_token_ids, output.token_ids, )) - # When encountering think end id in delta_token_ids, - # process the `content`. Only keep 'content', - # remove 'reasoning_content' + # When encountering think end id in delta_token_ids + # or think end id in prompt_token_ids + # i.e {"enable_thinking": False}, + # set reasoning status to end. + # Only keep 'content', remove 'reasoning_content'. if reasoning_parser.is_reasoning_end( - list(output.token_ids)): + list(output.token_ids)) or \ + (res.prompt_token_ids and + reasoning_parser.is_reasoning_end( + list(res.prompt_token_ids) + )): + reasoning_end_arr[i] = True if delta_message and delta_message.content: # This need to be added to next `delta_text` current_text = delta_message.content