[Bugfix] [Frontend] Cleanup gpt-oss non-streaming chat tool calls (#25514)

Signed-off-by: Ben Browning <bbrownin@redhat.com> Signed-off-by: yewentao256 <zhyanwentao@126.com>
2026-05-02 20:57:58 +08:00 · 2025-09-23 23:20:38 -04:00 · 2025-09-23 23:20:38 -04:00 · d7fb5a4ae8
commit d7fb5a4ae8
parent f52b991db6
4 changed files with 102 additions and 12 deletions
--- a/tests/entrypoints/openai/test_serving_chat.py
+++ b/tests/entrypoints/openai/test_serving_chat.py
@ -194,6 +194,7 @@ async def test_gpt_oss_multi_turn_chat(gptoss_client: OpenAI,
    assert tc.function is not None and tc.function.name == "get_current_weather"
    args1 = tc.function.arguments
    assert args1 is not None and len(args1) > 0
    assert not first_msg.content
    messages.append({"role": "assistant", "content": args1})
    messages.append({
--- a/tests/tool_use/test_openai_tool_parser.py
+++ b/tests/tool_use/test_openai_tool_parser.py
@ -70,7 +70,12 @@ def test_extract_tool_calls_no_tools(openai_tool_parser, harmony_encoding):
    assert extracted_info.content == "This is a test"
-def test_extract_tool_calls_single_tool(openai_tool_parser, harmony_encoding):
+@pytest.mark.parametrize("tool_args", [
    '{"location": "Tokyo"}',
    '{\n"location": "Tokyo"\n}',
 ])
 def test_extract_tool_calls_single_tool(openai_tool_parser, harmony_encoding,
                                        tool_args):
    convo = Conversation.from_messages([
        Message.from_role_and_content(Role.USER,
                                      "What is the weather in Tokyo?"),
@ -80,7 +85,7 @@ def test_extract_tool_calls_single_tool(openai_tool_parser, harmony_encoding):
        ).with_channel("analysis"),
        Message.from_role_and_content(
            Role.ASSISTANT,
-            '{"location": "Tokyo"}').with_channel("commentary").with_recipient(
+            tool_args).with_channel("commentary").with_recipient(
                "functions.get_current_weather").with_content_type("json"),
    ])
    token_ids = harmony_encoding.render_conversation_for_completion(
@ -121,6 +126,17 @@ def test_extract_tool_calls_multiple_tools(
            Role.ASSISTANT,
            '{"location": "Tokyo"}').with_channel("commentary").with_recipient(
                "functions.get_user_location").with_content_type("json"),
        Message.from_role_and_content(
            Role.ASSISTANT, '{"location": "Tokyo"}').with_channel(
                "commentary").with_recipient("functions.no_content_type"),
        Message.from_role_and_content(Role.ASSISTANT, "foo").with_channel(
            "commentary").with_recipient("functions.not_json_no_content_type"),
        Message.from_role_and_content(
            Role.ASSISTANT, '{}').with_channel("commentary").with_recipient(
                "functions.empty_args").with_content_type("json"),
        Message.from_role_and_content(
            Role.ASSISTANT, '').with_channel("commentary").with_recipient(
                "functions.no_args").with_content_type("json"),
    ])
    token_ids = harmony_encoding.render_conversation_for_completion(
        convo,
@ -141,7 +157,63 @@ def test_extract_tool_calls_multiple_tools(
        ToolCall(function=FunctionCall(
            name="get_user_location",
            arguments=json.dumps({"location": "Tokyo"}),
        )),
        ToolCall(function=FunctionCall(
            name="no_content_type",
            arguments=json.dumps({"location": "Tokyo"}),
        )),
        ToolCall(function=FunctionCall(
            name="not_json_no_content_type",
            arguments="foo",
        )),
        ToolCall(function=FunctionCall(
            name="empty_args",
            arguments=json.dumps({}),
        )),
        ToolCall(function=FunctionCall(
            name="no_args",
            arguments="",
        ))
    ]
    assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
    assert extracted_info.content is None
 def test_extract_tool_calls_with_content(
    openai_tool_parser,
    harmony_encoding,
 ):
    final_content = "This tool call will get the weather."
    convo = Conversation.from_messages([
        Message.from_role_and_content(
            Role.USER, "What is the weather in Tokyo based on where I'm at?"),
        Message.from_role_and_content(
            Role.ASSISTANT,
            'User asks: "What is the weather in Tokyo?" based on their location. We need to use get_current_weather tool and get_user_location tool.',  #  noqa: E501
        ).with_channel("analysis"),
        Message.from_role_and_content(
            Role.ASSISTANT,
            '{"location": "Tokyo"}').with_channel("commentary").with_recipient(
                "functions.get_current_weather").with_content_type("json"),
        Message.from_role_and_content(Role.ASSISTANT,
                                      final_content).with_channel("final"),
    ])
    token_ids = harmony_encoding.render_conversation_for_completion(
        convo,
        Role.ASSISTANT,
    )
    extracted_info = openai_tool_parser.extract_tool_calls(
        "",
        request=None,
        token_ids=token_ids,
    )
    assert extracted_info.tools_called
    expected_tool_calls = [
        ToolCall(function=FunctionCall(
            name="get_current_weather",
            arguments=json.dumps({"location": "Tokyo"}),
        )),
    ]
    assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
    assert extracted_info.content == final_content
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@ -1186,6 +1186,10 @@ class OpenAIServingChat(OpenAIServing):
                logprobs = None
            if self.use_harmony:
                reasoning_content, content, _ = parse_chat_output(token_ids)
                if not request.include_reasoning:
                    reasoning_content = None
                if self.tool_parser is not None:
                    tool_parser = self.tool_parser(tokenizer)
                    # NOTE: We use token_ids for openai tool parser
@ -1194,10 +1198,7 @@ class OpenAIServingChat(OpenAIServing):
                        request=request,
                        token_ids=token_ids,  # type: ignore
                    )
-                    reasoning_content, content = None, tool_call_info.content
+                    content = tool_call_info.content
                    if request.include_reasoning:
                        reasoning_content, content, _ = parse_chat_output(
                            token_ids)
                    message = ChatMessage(
                        role=role,
                        reasoning_content=reasoning_content,
@ -1205,10 +1206,6 @@ class OpenAIServingChat(OpenAIServing):
                        tool_calls=tool_call_info.tool_calls,
                    )
                else:
                    reasoning_content, content, _ = parse_chat_output(
                        token_ids)
                    if not request.include_reasoning:
                        reasoning_content = None
                    message = ChatMessage(
                        role=role,
                        reasoning_content=reasoning_content,
--- a/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py
@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from __future__ import annotations
 import json
 from collections.abc import Sequence
 from typing import TYPE_CHECKING
@ -12,10 +13,13 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                              FunctionCall, ToolCall)
 from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
    ToolParser, ToolParserManager)
 from vllm.logger import init_logger
 if TYPE_CHECKING:
    from vllm.transformers_utils.tokenizer import AnyTokenizer
 logger = init_logger(__name__)
@ToolParserManager.register_module("openai")
 class OpenAIToolParser(ToolParser):
@ -40,17 +44,33 @@ class OpenAIToolParser(ToolParser):
        if len(parser.messages) > 0:
            for msg in parser.messages:
                if len(msg.content) < 1:
                    continue
                msg_text = msg.content[0].text
                if msg.recipient and msg.recipient.startswith("functions."):
                    # If no content-type is given assume JSON, as that's the
                    # most common case with gpt-oss models.
                    if not msg.content_type or "json" in msg.content_type:
                        # load and dump the JSON text to check validity and
                        # remove any extra newlines or other odd formatting
                        try:
                            tool_args = json.dumps(json.loads(msg_text))
                        except json.JSONDecodeError:
                            logger.exception(
                                "Error decoding JSON tool call from response.")
                            tool_args = msg_text
                    else:
                        tool_args = msg_text
                    tool_calls.append(
                        ToolCall(
                            type="function",
                            function=FunctionCall(
                                name=msg.recipient.split("functions.")[1],
-                                arguments=msg.content[0].text,
+                                arguments=tool_args,
                            ),
                        ))
                elif msg.channel == "final":
-                    final_content = msg.content[0].text
+                    final_content = msg_text
        return ExtractedToolCallInformation(
            tools_called=len(tool_calls) > 0,