mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-22 06:35:01 +08:00
[responsesAPI][6] Fix multi turn MCP tokenization (#30230)
Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com>
This commit is contained in:
parent
abe93bce59
commit
c3487aca34
@ -2,6 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
|
||||
from openai.types.responses.response_function_tool_call_output_item import (
|
||||
ResponseFunctionToolCallOutputItem,
|
||||
)
|
||||
@ -14,7 +15,8 @@ from openai.types.responses.response_reasoning_item import (
|
||||
)
|
||||
|
||||
from vllm.entrypoints.responses_utils import (
|
||||
construct_chat_message_with_tool_call,
|
||||
_construct_single_message_from_response_item,
|
||||
construct_chat_messages_with_tool_call,
|
||||
convert_tool_responses_to_completions_format,
|
||||
)
|
||||
|
||||
@ -42,7 +44,43 @@ class TestResponsesUtils:
|
||||
|
||||
assert result == {"type": "function", "function": input_tool}
|
||||
|
||||
def test_construct_chat_message_with_tool_call(self):
|
||||
def test_construct_chat_messages_with_tool_call(self):
|
||||
"""Test construction of chat messages with tool calls."""
|
||||
reasoning_item = ResponseReasoningItem(
|
||||
id="lol",
|
||||
summary=[],
|
||||
type="reasoning",
|
||||
content=[
|
||||
Content(
|
||||
text="Leroy Jenkins",
|
||||
type="reasoning_text",
|
||||
)
|
||||
],
|
||||
encrypted_content=None,
|
||||
status=None,
|
||||
)
|
||||
mcp_tool_item = ResponseFunctionToolCall(
|
||||
id="mcp_123",
|
||||
call_id="call_123",
|
||||
type="function_call",
|
||||
status="completed",
|
||||
name="python",
|
||||
arguments='{"code": "123+456"}',
|
||||
)
|
||||
input_items = [reasoning_item, mcp_tool_item]
|
||||
messages = construct_chat_messages_with_tool_call(input_items)
|
||||
|
||||
assert len(messages) == 1
|
||||
message = messages[0]
|
||||
assert message["role"] == "assistant"
|
||||
assert message["reasoning"] == "Leroy Jenkins"
|
||||
assert message["tool_calls"][0]["id"] == "call_123"
|
||||
assert message["tool_calls"][0]["function"]["name"] == "python"
|
||||
assert (
|
||||
message["tool_calls"][0]["function"]["arguments"] == '{"code": "123+456"}'
|
||||
)
|
||||
|
||||
def test_construct_single_message_from_response_item(self):
|
||||
item = ResponseReasoningItem(
|
||||
id="lol",
|
||||
summary=[],
|
||||
@ -56,7 +94,7 @@ class TestResponsesUtils:
|
||||
encrypted_content=None,
|
||||
status=None,
|
||||
)
|
||||
formatted_item = construct_chat_message_with_tool_call(item)
|
||||
formatted_item = _construct_single_message_from_response_item(item)
|
||||
assert formatted_item["role"] == "assistant"
|
||||
assert formatted_item["reasoning"] == "Leroy Jenkins"
|
||||
|
||||
@ -74,7 +112,7 @@ class TestResponsesUtils:
|
||||
status=None,
|
||||
)
|
||||
|
||||
formatted_item = construct_chat_message_with_tool_call(item)
|
||||
formatted_item = _construct_single_message_from_response_item(item)
|
||||
assert formatted_item["role"] == "assistant"
|
||||
assert (
|
||||
formatted_item["reasoning"]
|
||||
@ -88,7 +126,7 @@ class TestResponsesUtils:
|
||||
output="1234",
|
||||
status="completed",
|
||||
)
|
||||
formatted_item = construct_chat_message_with_tool_call(tool_call_output)
|
||||
formatted_item = _construct_single_message_from_response_item(tool_call_output)
|
||||
assert formatted_item["role"] == "tool"
|
||||
assert formatted_item["content"] == "1234"
|
||||
assert formatted_item["tool_call_id"] == "temp"
|
||||
@ -102,7 +140,7 @@ class TestResponsesUtils:
|
||||
status=None,
|
||||
)
|
||||
with pytest.raises(ValueError):
|
||||
construct_chat_message_with_tool_call(item)
|
||||
_construct_single_message_from_response_item(item)
|
||||
|
||||
output_item = ResponseOutputMessage(
|
||||
id="msg_bf585bbbe3d500e0",
|
||||
@ -119,6 +157,6 @@ class TestResponsesUtils:
|
||||
type="message",
|
||||
)
|
||||
|
||||
formatted_item = construct_chat_message_with_tool_call(output_item)
|
||||
formatted_item = _construct_single_message_from_response_item(output_item)
|
||||
assert formatted_item["role"] == "assistant"
|
||||
assert formatted_item["content"] == "dongyi"
|
||||
|
||||
@ -8,3 +8,5 @@ Shared constants for vLLM entrypoints.
|
||||
# These constants help mitigate header abuse attacks
|
||||
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT = 4194304 # 4 MB
|
||||
H11_MAX_HEADER_COUNT_DEFAULT = 256
|
||||
|
||||
MCP_PREFIX = "mcp_"
|
||||
|
||||
@ -19,6 +19,7 @@ from vllm import envs
|
||||
from vllm.entrypoints.chat_utils import (
|
||||
ChatTemplateContentFormatOption,
|
||||
)
|
||||
from vllm.entrypoints.constants import MCP_PREFIX
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import (
|
||||
get_encoding,
|
||||
get_streamable_parser_for_assistant,
|
||||
@ -303,7 +304,7 @@ class ParsableContext(ConversationContext):
|
||||
result_str = result.content[0].text
|
||||
|
||||
message = ResponseFunctionToolCallOutputItem(
|
||||
id=f"fco_{random_uuid()}",
|
||||
id=f"mcpo_{random_uuid()}",
|
||||
type="function_call_output",
|
||||
call_id=f"call_{random_uuid()}",
|
||||
output=result_str,
|
||||
@ -385,6 +386,9 @@ class ParsableContext(ConversationContext):
|
||||
if not self.parser.response_messages:
|
||||
return []
|
||||
last_msg = self.parser.response_messages[-1]
|
||||
# change this to a mcp_ function call
|
||||
last_msg.id = f"{MCP_PREFIX}{random_uuid()}"
|
||||
self.parser.response_messages[-1] = last_msg
|
||||
if last_msg.name == "code_interpreter":
|
||||
return await self.call_python_tool(self._tool_sessions["python"], last_msg)
|
||||
elif last_msg.name == "web_search_preview":
|
||||
|
||||
@ -1339,6 +1339,7 @@ class OpenAIServing:
|
||||
)
|
||||
engine_prompt = engine_prompts[0]
|
||||
request_prompt = request_prompts[0]
|
||||
prompt_text, _, _ = self._get_prompt_components(request_prompt)
|
||||
|
||||
# Update the sampling params.
|
||||
sampling_params.max_tokens = self.max_model_len - len(
|
||||
|
||||
@ -22,6 +22,7 @@ from openai.types.responses.response_reasoning_item import ResponseReasoningItem
|
||||
from openai.types.responses.tool import Tool
|
||||
|
||||
from vllm import envs
|
||||
from vllm.entrypoints.constants import MCP_PREFIX
|
||||
from vllm.entrypoints.openai.protocol import (
|
||||
ChatCompletionMessageParam,
|
||||
ResponseInputOutputItem,
|
||||
@ -44,13 +45,13 @@ def make_response_output_items_from_parsable_context(
|
||||
)
|
||||
if isinstance(output_messages[-1], ResponseFunctionToolCall):
|
||||
mcp_message = McpCall(
|
||||
id=f"mcp_{random_uuid()}",
|
||||
id=f"{MCP_PREFIX}{random_uuid()}",
|
||||
arguments=output_messages[-1].arguments,
|
||||
name=output_messages[-1].name,
|
||||
server_label=output_messages[
|
||||
-1
|
||||
].name, # TODO: store the server label
|
||||
type="mcp_call",
|
||||
type=f"{MCP_PREFIX}call",
|
||||
status="completed",
|
||||
output=message.output,
|
||||
# TODO: support error output
|
||||
@ -98,12 +99,63 @@ def construct_input_messages(
|
||||
if isinstance(request_input, str):
|
||||
messages.append({"role": "user", "content": request_input})
|
||||
else:
|
||||
for item in request_input:
|
||||
messages.append(construct_chat_message_with_tool_call(item))
|
||||
input_messages = construct_chat_messages_with_tool_call(request_input)
|
||||
messages.extend(input_messages)
|
||||
return messages
|
||||
|
||||
|
||||
def construct_chat_message_with_tool_call(
|
||||
def _maybe_combine_reasoning_and_tool_call(
|
||||
item: ResponseInputOutputItem, messages: list[ChatCompletionMessageParam]
|
||||
) -> ChatCompletionMessageParam | None:
|
||||
"""Many models treat MCP calls and reasoning as a single message.
|
||||
This function checks if the last message is a reasoning message and
|
||||
the current message is a tool call"""
|
||||
if not (
|
||||
isinstance(item, ResponseFunctionToolCall) and item.id.startswith(MCP_PREFIX)
|
||||
):
|
||||
return None
|
||||
if len(messages) == 0:
|
||||
return None
|
||||
last_message = messages[-1]
|
||||
if not (
|
||||
last_message.get("role") == "assistant"
|
||||
and last_message.get("reasoning") is not None
|
||||
):
|
||||
return None
|
||||
|
||||
last_message["tool_calls"] = [
|
||||
ChatCompletionMessageToolCallParam(
|
||||
id=item.call_id,
|
||||
function=FunctionCallTool(
|
||||
name=item.name,
|
||||
arguments=item.arguments,
|
||||
),
|
||||
type="function",
|
||||
)
|
||||
]
|
||||
return last_message
|
||||
|
||||
|
||||
def construct_chat_messages_with_tool_call(
|
||||
input_messages: list[ResponseInputOutputItem],
|
||||
) -> list[ChatCompletionMessageParam]:
|
||||
"""This function wraps _construct_single_message_from_response_item
|
||||
Because some chatMessages come from multiple response items
|
||||
for example a reasoning item and a MCP tool call are two response items
|
||||
but are one chat message
|
||||
"""
|
||||
messages: list[ChatCompletionMessageParam] = []
|
||||
for item in input_messages:
|
||||
maybe_combined_message = _maybe_combine_reasoning_and_tool_call(item, messages)
|
||||
if maybe_combined_message is not None:
|
||||
messages[-1] = maybe_combined_message
|
||||
else:
|
||||
messages.append(_construct_single_message_from_response_item(item))
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
def _construct_single_message_from_response_item(
|
||||
item: ResponseInputOutputItem,
|
||||
) -> ChatCompletionMessageParam:
|
||||
if isinstance(item, ResponseFunctionToolCall):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user