vllm/tests/tool_use/test_openai_tool_parser.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import json

import pytest
from openai_harmony import (
    Conversation,
    DeveloperContent,
    HarmonyEncodingName,
    Message,
    Role,
    SystemContent,
    load_harmony_encoding,
)

from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers import OpenAIToolParser
from vllm.transformers_utils.tokenizer import get_tokenizer

MODEL = "gpt2"


@pytest.fixture(scope="module")
def openai_tokenizer():
    # The parser does not use the tokenizer, but the constructor requires it.
    return get_tokenizer(MODEL)


@pytest.fixture
def openai_tool_parser(openai_tokenizer):
    return OpenAIToolParser(openai_tokenizer)


@pytest.fixture(scope="module")
def harmony_encoding():
    return load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)


def assert_tool_calls(
    actual_tool_calls: list[ToolCall],
    expected_tool_calls: list[ToolCall],
):
    assert len(actual_tool_calls) == len(expected_tool_calls)

    for actual_tool_call, expected_tool_call in zip(
        actual_tool_calls, expected_tool_calls
    ):
        assert isinstance(actual_tool_call.id, str)
        assert len(actual_tool_call.id) > 16  # Default from protocol.py
        assert actual_tool_call.type == "function"
        assert actual_tool_call.function == expected_tool_call.function


def test_extract_tool_calls_no_tools(openai_tool_parser, harmony_encoding):
    convo = Conversation.from_messages(
        [
            Message.from_role_and_content(
                Role.SYSTEM,
                SystemContent.new(),
            ),
            Message.from_role_and_content(
                Role.DEVELOPER,
                DeveloperContent.new().with_instructions("Talk like a pirate!"),
            ),
            Message.from_role_and_content(Role.USER, "Arrr, how be you?"),
            Message.from_role_and_content(
                Role.ASSISTANT, "This is a test"
            ).with_channel("final"),
        ]
    )
    token_ids = harmony_encoding.render_conversation_for_completion(
        convo, Role.ASSISTANT
    )
    extracted_info = openai_tool_parser.extract_tool_calls(
        "",
        request=None,
        token_ids=token_ids,
    )
    assert not extracted_info.tools_called
    assert extracted_info.tool_calls == []
    assert extracted_info.content == "This is a test"


@pytest.mark.parametrize(
    "tool_args",
    [
        '{"location": "Tokyo"}',
        '{\n"location": "Tokyo"\n}',
    ],
)
def test_extract_tool_calls_single_tool(
    openai_tool_parser, harmony_encoding, tool_args
):
    convo = Conversation.from_messages(
        [
            Message.from_role_and_content(Role.USER, "What is the weather in Tokyo?"),
            Message.from_role_and_content(
                Role.ASSISTANT,
                'User asks: "What is the weather in Tokyo?" We need to use get_current_weather tool.',  #  noqa: E501
            ).with_channel("analysis"),
            Message.from_role_and_content(Role.ASSISTANT, tool_args)
            .with_channel("commentary")
            .with_recipient("functions.get_current_weather")
            .with_content_type("json"),
        ]
    )
    token_ids = harmony_encoding.render_conversation_for_completion(
        convo, Role.ASSISTANT
    )

    extracted_info = openai_tool_parser.extract_tool_calls(
        "",
        request=None,
        token_ids=token_ids,
    )
    assert extracted_info.tools_called
    expected_tool_calls = [
        ToolCall(
            function=FunctionCall(
                name="get_current_weather",
                arguments=json.dumps({"location": "Tokyo"}),
            )
        )
    ]
    assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
    assert extracted_info.content is None


def test_extract_tool_calls_multiple_tools(
    openai_tool_parser,
    harmony_encoding,
):
    convo = Conversation.from_messages(
        [
            Message.from_role_and_content(
                Role.USER, "What is the weather in Tokyo based on where I'm at?"
            ),
            Message.from_role_and_content(
                Role.ASSISTANT,
                'User asks: "What is the weather in Tokyo?" based on their location. We need to use get_current_weather tool and get_user_location tool.',  #  noqa: E501
            ).with_channel("analysis"),
            Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
            .with_channel("commentary")
            .with_recipient("functions.get_current_weather")
            .with_content_type("json"),
            Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
            .with_channel("commentary")
            .with_recipient("functions.get_user_location")
            .with_content_type("json"),
            Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
            .with_channel("commentary")
            .with_recipient("functions.no_content_type"),
            Message.from_role_and_content(Role.ASSISTANT, "foo")
            .with_channel("commentary")
            .with_recipient("functions.not_json_no_content_type"),
            Message.from_role_and_content(Role.ASSISTANT, "{}")
            .with_channel("commentary")
            .with_recipient("functions.empty_args")
            .with_content_type("json"),
            Message.from_role_and_content(Role.ASSISTANT, "")
            .with_channel("commentary")
            .with_recipient("functions.no_args")
            .with_content_type("json"),
        ]
    )
    token_ids = harmony_encoding.render_conversation_for_completion(
        convo,
        Role.ASSISTANT,
    )

    extracted_info = openai_tool_parser.extract_tool_calls(
        "",
        request=None,
        token_ids=token_ids,
    )
    assert extracted_info.tools_called
    expected_tool_calls = [
        ToolCall(
            function=FunctionCall(
                name="get_current_weather",
                arguments=json.dumps({"location": "Tokyo"}),
            )
        ),
        ToolCall(
            function=FunctionCall(
                name="get_user_location",
                arguments=json.dumps({"location": "Tokyo"}),
            )
        ),
        ToolCall(
            function=FunctionCall(
                name="no_content_type",
                arguments=json.dumps({"location": "Tokyo"}),
            )
        ),
        ToolCall(
            function=FunctionCall(
                name="not_json_no_content_type",
                arguments="foo",
            )
        ),
        ToolCall(
            function=FunctionCall(
                name="empty_args",
                arguments=json.dumps({}),
            )
        ),
        ToolCall(
            function=FunctionCall(
                name="no_args",
                arguments="",
            )
        ),
    ]
    assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
    assert extracted_info.content is None


def test_extract_tool_calls_with_content(
    openai_tool_parser,
    harmony_encoding,
):
    final_content = "This tool call will get the weather."
    convo = Conversation.from_messages(
        [
            Message.from_role_and_content(
                Role.USER, "What is the weather in Tokyo based on where I'm at?"
            ),
            Message.from_role_and_content(
                Role.ASSISTANT,
                'User asks: "What is the weather in Tokyo?" based on their location. We need to use get_current_weather tool and get_user_location tool.',  #  noqa: E501
            ).with_channel("analysis"),
            Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
            .with_channel("commentary")
            .with_recipient("functions.get_current_weather")
            .with_content_type("json"),
            Message.from_role_and_content(Role.ASSISTANT, final_content).with_channel(
                "final"
            ),
        ]
    )
    token_ids = harmony_encoding.render_conversation_for_completion(
        convo,
        Role.ASSISTANT,
    )

    extracted_info = openai_tool_parser.extract_tool_calls(
        "",
        request=None,
        token_ids=token_ids,
    )
    assert extracted_info.tools_called
    expected_tool_calls = [
        ToolCall(
            function=FunctionCall(
                name="get_current_weather",
                arguments=json.dumps({"location": "Tokyo"}),
            )
        ),
    ]
    assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
    assert extracted_info.content == final_content