mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 12:15:40 +08:00
[Frontend] Add MCP type support infrastructure to Responses API (#30054)
Signed-off-by: Daniel Salib <danielsalib@meta.com>
This commit is contained in:
parent
af0444bf40
commit
444f0e3f33
@ -2,6 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from openai.types.responses import ResponseFunctionToolCall, ResponseReasoningItem
|
||||
from openai.types.responses.response_output_item import McpCall
|
||||
from openai_harmony import Author, Message, Role, TextContent
|
||||
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import (
|
||||
@ -400,17 +401,19 @@ class TestParseOutputMessage:
|
||||
assert output_items[0].arguments == '{"location": "San Francisco"}'
|
||||
assert output_items[1].arguments == '{"location": "New York"}'
|
||||
|
||||
def test_commentary_with_unknown_recipient_raises_error(self):
|
||||
"""Test that commentary with unknown recipient raises ValueError."""
|
||||
message = Message.from_role_and_content(Role.ASSISTANT, "some content")
|
||||
def test_commentary_with_unknown_recipient_creates_mcp_call(self):
|
||||
"""Test that commentary with unknown recipient creates MCP call."""
|
||||
message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
|
||||
message = message.with_channel("commentary")
|
||||
message = message.with_recipient("unknown_recipient")
|
||||
message = message.with_recipient("custom_tool")
|
||||
|
||||
try:
|
||||
parse_output_message(message)
|
||||
raise AssertionError("Expected ValueError to be raised")
|
||||
except ValueError as e:
|
||||
assert "Unknown recipient: unknown_recipient" in str(e)
|
||||
output_items = parse_output_message(message)
|
||||
|
||||
assert len(output_items) == 1
|
||||
assert isinstance(output_items[0], McpCall)
|
||||
assert output_items[0].type == "mcp_call"
|
||||
assert output_items[0].name == "custom_tool"
|
||||
assert output_items[0].server_label == "custom_tool"
|
||||
|
||||
def test_analysis_channel_creates_reasoning(self):
|
||||
"""Test that analysis channel creates reasoning items."""
|
||||
@ -451,3 +454,167 @@ def test_has_custom_tools() -> None:
|
||||
assert has_custom_tools(
|
||||
{"web_search_preview", "code_interpreter", "container", "others"}
|
||||
)
|
||||
|
||||
|
||||
def test_parse_mcp_call_basic() -> None:
|
||||
"""Test that MCP calls are parsed with correct type and server_label."""
|
||||
message = Message.from_role_and_content(Role.ASSISTANT, '{"path": "/tmp"}')
|
||||
message = message.with_recipient("filesystem")
|
||||
message = message.with_channel("commentary")
|
||||
|
||||
output_items = parse_output_message(message)
|
||||
|
||||
assert len(output_items) == 1
|
||||
assert isinstance(output_items[0], McpCall)
|
||||
assert output_items[0].type == "mcp_call"
|
||||
assert output_items[0].name == "filesystem"
|
||||
assert output_items[0].server_label == "filesystem"
|
||||
assert output_items[0].arguments == '{"path": "/tmp"}'
|
||||
assert output_items[0].status == "completed"
|
||||
|
||||
|
||||
def test_parse_mcp_call_dotted_recipient() -> None:
|
||||
"""Test that dotted recipients extract the tool name correctly."""
|
||||
message = Message.from_role_and_content(Role.ASSISTANT, '{"cmd": "ls"}')
|
||||
message = message.with_recipient("repo_browser.list")
|
||||
message = message.with_channel("commentary")
|
||||
|
||||
output_items = parse_output_message(message)
|
||||
|
||||
assert len(output_items) == 1
|
||||
assert isinstance(output_items[0], McpCall)
|
||||
assert output_items[0].name == "list"
|
||||
assert output_items[0].server_label == "repo_browser"
|
||||
|
||||
|
||||
def test_mcp_vs_function_call() -> None:
|
||||
"""Test that function calls are not parsed as MCP calls."""
|
||||
func_message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
|
||||
func_message = func_message.with_recipient("functions.my_tool")
|
||||
func_message = func_message.with_channel("commentary")
|
||||
|
||||
func_items = parse_output_message(func_message)
|
||||
|
||||
assert len(func_items) == 1
|
||||
assert not isinstance(func_items[0], McpCall)
|
||||
assert func_items[0].type == "function_call"
|
||||
|
||||
|
||||
def test_mcp_vs_builtin_tools() -> None:
|
||||
"""Test that built-in tools (python, container) are not parsed as MCP calls."""
|
||||
# Test python (built-in tool) - should be reasoning, not MCP
|
||||
python_message = Message.from_role_and_content(Role.ASSISTANT, "print('hello')")
|
||||
python_message = python_message.with_recipient("python")
|
||||
python_message = python_message.with_channel("commentary")
|
||||
|
||||
python_items = parse_output_message(python_message)
|
||||
|
||||
assert len(python_items) == 1
|
||||
assert not isinstance(python_items[0], McpCall)
|
||||
assert python_items[0].type == "reasoning"
|
||||
|
||||
|
||||
def test_parse_remaining_state_commentary_channel() -> None:
|
||||
"""Test parse_remaining_state with commentary channel and various recipients."""
|
||||
from unittest.mock import Mock
|
||||
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import parse_remaining_state
|
||||
|
||||
# Test 1: functions.* recipient → should return function tool call
|
||||
parser_func = Mock()
|
||||
parser_func.current_content = '{"arg": "value"}'
|
||||
parser_func.current_role = Role.ASSISTANT
|
||||
parser_func.current_channel = "commentary"
|
||||
parser_func.current_recipient = "functions.my_tool"
|
||||
|
||||
func_items = parse_remaining_state(parser_func)
|
||||
|
||||
assert len(func_items) == 1
|
||||
assert not isinstance(func_items[0], McpCall)
|
||||
assert func_items[0].type == "function_call"
|
||||
assert func_items[0].name == "my_tool"
|
||||
assert func_items[0].status == "in_progress"
|
||||
|
||||
# Test 2: MCP tool (not builtin) → should return MCP call
|
||||
parser_mcp = Mock()
|
||||
parser_mcp.current_content = '{"path": "/tmp"}'
|
||||
parser_mcp.current_role = Role.ASSISTANT
|
||||
parser_mcp.current_channel = "commentary"
|
||||
parser_mcp.current_recipient = "filesystem"
|
||||
|
||||
mcp_items = parse_remaining_state(parser_mcp)
|
||||
|
||||
assert len(mcp_items) == 1
|
||||
assert isinstance(mcp_items[0], McpCall)
|
||||
assert mcp_items[0].type == "mcp_call"
|
||||
assert mcp_items[0].name == "filesystem"
|
||||
assert mcp_items[0].server_label == "filesystem"
|
||||
assert mcp_items[0].status == "in_progress"
|
||||
|
||||
# Test 3: Built-in tool (python)
|
||||
# should NOT return MCP call, falls through to reasoning
|
||||
parser_builtin = Mock()
|
||||
parser_builtin.current_content = "print('hello')"
|
||||
parser_builtin.current_role = Role.ASSISTANT
|
||||
parser_builtin.current_channel = "commentary"
|
||||
parser_builtin.current_recipient = "python"
|
||||
|
||||
builtin_items = parse_remaining_state(parser_builtin)
|
||||
|
||||
# Should fall through to reasoning logic
|
||||
assert len(builtin_items) == 1
|
||||
assert not isinstance(builtin_items[0], McpCall)
|
||||
assert builtin_items[0].type == "reasoning"
|
||||
|
||||
|
||||
def test_parse_remaining_state_analysis_channel() -> None:
|
||||
"""Test parse_remaining_state with analysis channel and various recipients."""
|
||||
from unittest.mock import Mock
|
||||
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import parse_remaining_state
|
||||
|
||||
# Test 1: functions.* recipient → should return function tool call
|
||||
parser_func = Mock()
|
||||
parser_func.current_content = '{"arg": "value"}'
|
||||
parser_func.current_role = Role.ASSISTANT
|
||||
parser_func.current_channel = "analysis"
|
||||
parser_func.current_recipient = "functions.my_tool"
|
||||
|
||||
func_items = parse_remaining_state(parser_func)
|
||||
|
||||
assert len(func_items) == 1
|
||||
assert not isinstance(func_items[0], McpCall)
|
||||
assert func_items[0].type == "function_call"
|
||||
assert func_items[0].name == "my_tool"
|
||||
assert func_items[0].status == "in_progress"
|
||||
|
||||
# Test 2: MCP tool (not builtin) → should return MCP call
|
||||
parser_mcp = Mock()
|
||||
parser_mcp.current_content = '{"query": "test"}'
|
||||
parser_mcp.current_role = Role.ASSISTANT
|
||||
parser_mcp.current_channel = "analysis"
|
||||
parser_mcp.current_recipient = "database"
|
||||
|
||||
mcp_items = parse_remaining_state(parser_mcp)
|
||||
|
||||
assert len(mcp_items) == 1
|
||||
assert isinstance(mcp_items[0], McpCall)
|
||||
assert mcp_items[0].type == "mcp_call"
|
||||
assert mcp_items[0].name == "database"
|
||||
assert mcp_items[0].server_label == "database"
|
||||
assert mcp_items[0].status == "in_progress"
|
||||
|
||||
# Test 3: Built-in tool (container)
|
||||
# should NOT return MCP call, falls through to reasoning
|
||||
parser_builtin = Mock()
|
||||
parser_builtin.current_content = "docker run"
|
||||
parser_builtin.current_role = Role.ASSISTANT
|
||||
parser_builtin.current_channel = "analysis"
|
||||
parser_builtin.current_recipient = "container"
|
||||
|
||||
builtin_items = parse_remaining_state(parser_builtin)
|
||||
|
||||
# Should fall through to reasoning logic
|
||||
assert len(builtin_items) == 1
|
||||
assert not isinstance(builtin_items[0], McpCall)
|
||||
assert builtin_items[0].type == "reasoning"
|
||||
|
||||
@ -19,6 +19,7 @@ from openai.types.responses.response_function_web_search import (
|
||||
ActionSearch,
|
||||
ResponseFunctionWebSearch,
|
||||
)
|
||||
from openai.types.responses.response_output_item import McpCall
|
||||
from openai.types.responses.response_reasoning_item import (
|
||||
Content as ResponseReasoningTextContent,
|
||||
)
|
||||
@ -155,11 +156,7 @@ def get_developer_message(
|
||||
"web_search_preview",
|
||||
"code_interpreter",
|
||||
"container",
|
||||
"mcp",
|
||||
):
|
||||
# These are built-in tools that are added to the system message.
|
||||
# Adding in MCP for now until we support MCP tools executed
|
||||
# server side
|
||||
pass
|
||||
|
||||
elif tool.type == "function":
|
||||
@ -427,6 +424,44 @@ def _parse_final_message(message: Message) -> ResponseOutputItem:
|
||||
)
|
||||
|
||||
|
||||
def _parse_mcp_recipient(recipient: str) -> tuple[str, str]:
|
||||
"""
|
||||
Parse MCP recipient into (server_label, tool_name).
|
||||
|
||||
For dotted recipients like "repo_browser.list":
|
||||
- server_label: "repo_browser" (namespace/server)
|
||||
- tool_name: "list" (specific tool)
|
||||
|
||||
For simple recipients like "filesystem":
|
||||
- server_label: "filesystem"
|
||||
- tool_name: "filesystem"
|
||||
"""
|
||||
if "." in recipient:
|
||||
server_label = recipient.split(".")[0]
|
||||
tool_name = recipient.split(".")[-1]
|
||||
else:
|
||||
server_label = recipient
|
||||
tool_name = recipient
|
||||
return server_label, tool_name
|
||||
|
||||
|
||||
def _parse_mcp_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
|
||||
"""Parse MCP calls into MCP call items."""
|
||||
server_label, tool_name = _parse_mcp_recipient(recipient)
|
||||
output_items = []
|
||||
for content in message.content:
|
||||
response_item = McpCall(
|
||||
arguments=content.text,
|
||||
type="mcp_call",
|
||||
name=tool_name,
|
||||
server_label=server_label,
|
||||
id=f"mcp_{random_uuid()}",
|
||||
status="completed",
|
||||
)
|
||||
output_items.append(response_item)
|
||||
return output_items
|
||||
|
||||
|
||||
def parse_output_message(message: Message) -> list[ResponseOutputItem]:
|
||||
"""
|
||||
Parse a Harmony message into a list of output response items.
|
||||
@ -440,33 +475,34 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
|
||||
output_items: list[ResponseOutputItem] = []
|
||||
recipient = message.recipient
|
||||
|
||||
# Browser tool calls
|
||||
if recipient is not None and recipient.startswith("browser."):
|
||||
output_items.append(_parse_browser_tool_call(message, recipient))
|
||||
if recipient is not None:
|
||||
# Browser tool calls
|
||||
if recipient.startswith("browser."):
|
||||
output_items.append(_parse_browser_tool_call(message, recipient))
|
||||
|
||||
# Analysis channel (reasoning/chain-of-thought)
|
||||
# Function calls (should only happen on commentary channel)
|
||||
elif message.channel == "commentary" and recipient.startswith("functions."):
|
||||
output_items.extend(_parse_function_call(message, recipient))
|
||||
|
||||
# Built-in tools are treated as reasoning
|
||||
elif recipient.startswith(("python", "browser", "container")):
|
||||
# Built-in tool recipients (python/browser/container)
|
||||
# generate reasoning output
|
||||
output_items.extend(_parse_reasoning_content(message))
|
||||
|
||||
# All other recipients are MCP calls
|
||||
else:
|
||||
output_items.extend(_parse_mcp_call(message, recipient))
|
||||
|
||||
# No recipient - handle based on channel for non-tool messages
|
||||
elif message.channel == "analysis":
|
||||
output_items.extend(_parse_reasoning_content(message))
|
||||
|
||||
# Commentary channel
|
||||
elif message.channel == "commentary":
|
||||
# Function calls
|
||||
if recipient is not None and recipient.startswith("functions."):
|
||||
output_items.extend(_parse_function_call(message, recipient))
|
||||
# Per Harmony format, commentary channel can contain preambles to calling
|
||||
# multiple functions - explanatory text with no recipient
|
||||
output_items.extend(_parse_reasoning_content(message))
|
||||
|
||||
# Built-in tools on commentary channel are treated as reasoning for now
|
||||
elif (
|
||||
recipient is None # Preambles: explanatory text before tool calls
|
||||
or recipient.startswith(("python", "browser", "container"))
|
||||
):
|
||||
# Per Harmony format, commentary channel can contain preambles to calling
|
||||
# multiple functions - explanatory text with no recipient. Built-in tool
|
||||
# recipients (python/browser/container) also generate reasoning output.
|
||||
output_items.extend(_parse_reasoning_content(message))
|
||||
else:
|
||||
raise ValueError(f"Unknown recipient: {recipient}")
|
||||
|
||||
# Final output message
|
||||
elif message.channel == "final":
|
||||
output_items.append(_parse_final_message(message))
|
||||
|
||||
@ -485,20 +521,70 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
|
||||
if current_recipient is not None and current_recipient.startswith("browser."):
|
||||
return []
|
||||
|
||||
if parser.current_channel == "analysis":
|
||||
reasoning_item = ResponseReasoningItem(
|
||||
id=f"rs_{random_uuid()}",
|
||||
summary=[],
|
||||
type="reasoning",
|
||||
content=[
|
||||
ResponseReasoningTextContent(
|
||||
text=parser.current_content, type="reasoning_text"
|
||||
if current_recipient and parser.current_channel in ("commentary", "analysis"):
|
||||
if current_recipient.startswith("functions."):
|
||||
rid = random_uuid()
|
||||
return [
|
||||
ResponseFunctionToolCall(
|
||||
arguments=parser.current_content,
|
||||
call_id=f"call_{rid}",
|
||||
type="function_call",
|
||||
name=current_recipient.split(".")[-1],
|
||||
id=f"fc_{rid}",
|
||||
status="in_progress",
|
||||
)
|
||||
],
|
||||
status=None,
|
||||
)
|
||||
return [reasoning_item]
|
||||
elif parser.current_channel == "final":
|
||||
]
|
||||
# Built-in tools (python, browser, container) should be treated as reasoning
|
||||
elif not (
|
||||
current_recipient.startswith("python")
|
||||
or current_recipient.startswith("browser")
|
||||
or current_recipient.startswith("container")
|
||||
):
|
||||
# All other recipients are MCP calls
|
||||
rid = random_uuid()
|
||||
server_label, tool_name = _parse_mcp_recipient(current_recipient)
|
||||
return [
|
||||
McpCall(
|
||||
arguments=parser.current_content,
|
||||
type="mcp_call",
|
||||
name=tool_name,
|
||||
server_label=server_label,
|
||||
id=f"mcp_{rid}",
|
||||
status="in_progress",
|
||||
)
|
||||
]
|
||||
|
||||
if parser.current_channel == "commentary":
|
||||
return [
|
||||
ResponseReasoningItem(
|
||||
id=f"rs_{random_uuid()}",
|
||||
summary=[],
|
||||
type="reasoning",
|
||||
content=[
|
||||
ResponseReasoningTextContent(
|
||||
text=parser.current_content, type="reasoning_text"
|
||||
)
|
||||
],
|
||||
status=None,
|
||||
)
|
||||
]
|
||||
|
||||
if parser.current_channel == "analysis":
|
||||
return [
|
||||
ResponseReasoningItem(
|
||||
id=f"rs_{random_uuid()}",
|
||||
summary=[],
|
||||
type="reasoning",
|
||||
content=[
|
||||
ResponseReasoningTextContent(
|
||||
text=parser.current_content, type="reasoning_text"
|
||||
)
|
||||
],
|
||||
status=None,
|
||||
)
|
||||
]
|
||||
|
||||
if parser.current_channel == "final":
|
||||
output_text = ResponseOutputText(
|
||||
text=parser.current_content,
|
||||
annotations=[], # TODO
|
||||
@ -515,6 +601,7 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
|
||||
type="message",
|
||||
)
|
||||
return [text_item]
|
||||
|
||||
return []
|
||||
|
||||
|
||||
|
||||
@ -25,6 +25,10 @@ from openai.types.responses import (
|
||||
ResponseContentPartDoneEvent,
|
||||
ResponseFunctionToolCall,
|
||||
ResponseInputItemParam,
|
||||
ResponseMcpCallArgumentsDeltaEvent,
|
||||
ResponseMcpCallArgumentsDoneEvent,
|
||||
ResponseMcpCallCompletedEvent,
|
||||
ResponseMcpCallInProgressEvent,
|
||||
ResponseOutputItem,
|
||||
ResponseOutputItemAddedEvent,
|
||||
ResponseOutputItemDoneEvent,
|
||||
@ -1790,6 +1794,10 @@ StreamingResponsesResponse: TypeAlias = (
|
||||
| ResponseCodeInterpreterCallCodeDoneEvent
|
||||
| ResponseCodeInterpreterCallInterpretingEvent
|
||||
| ResponseCodeInterpreterCallCompletedEvent
|
||||
| ResponseMcpCallArgumentsDeltaEvent
|
||||
| ResponseMcpCallArgumentsDoneEvent
|
||||
| ResponseMcpCallInProgressEvent
|
||||
| ResponseMcpCallCompletedEvent
|
||||
)
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user