[Frontend] Add tool filtering support to ToolServer (#29224)

Signed-off-by: Daniel Salib <danielsalib@meta.com> Co-authored-by: Chauncey <chaunceyjiang@gmail.com>
2026-06-11 03:29:10 +08:00 · 2025-12-01 03:03:57 -05:00 · 2025-12-01 03:03:57 -05:00 · 014ece97c7
commit 014ece97c7
parent 62de4f4257
5 changed files with 477 additions and 25 deletions
--- a/examples/online_serving/openai_responses_client_with_mcp_tools.py
+++ b/examples/online_serving/openai_responses_client_with_mcp_tools.py
@ -0,0 +1,184 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """
 Example demonstrating MCP (Model Context Protocol) tools with the Responses API.
 This example shows how to use MCP tools with different allowed_tools configurations:
 1. No filter (allows all tools from the MCP server)
 2. Wildcard "*" (explicitly allows all tools)
 3. Specific tool names (filters to only those tools)
 Set up this example by starting a vLLM OpenAI-compatible server with MCP tools enabled.
 For example:
 vllm serve openai/gpt-oss-20b --enforce-eager --tool-server demo
 Environment variables:
 - VLLM_ENABLE_RESPONSES_API_STORE=1
 - VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS=code_interpreter,container
 - VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS=1
 """
 from openai import OpenAI
 from utils import get_first_model
 def example_no_filter():
    """Example with no allowed_tools filter - allows all tools."""
    print("=" * 60)
    print("Example 1: No allowed_tools filter (allows all tools)")
    print("=" * 60)
    base_url = "http://0.0.0.0:8000/v1"
    client = OpenAI(base_url=base_url, api_key="empty")
    model = get_first_model(client)
    response = client.responses.create(
        model=model,
        input="Execute this code: print('Hello from Python!')",
        instructions="Use the Python tool to execute code.",
        tools=[
            {
                "type": "mcp",
                "server_label": "code_interpreter",
                "server_url": "http://localhost:8888",
                # No allowed_tools specified - all tools are available
            }
        ],
    )
    print(f"Status: {response.status}")
    print(f"Output: {response.output_text}")
    print()
 def example_wildcard():
    """Example with allowed_tools=['*'] - explicitly allows all tools."""
    print("=" * 60)
    print("Example 2: allowed_tools=['*'] (select all tools)")
    print("=" * 60)
    base_url = "http://0.0.0.0:8000/v1"
    client = OpenAI(base_url=base_url, api_key="empty")
    model = get_first_model(client)
    response = client.responses.create(
        model=model,
        input="Execute this code: print('Hello from Python with wildcard!')",
        instructions="Use the Python tool to execute code.",
        tools=[
            {
                "type": "mcp",
                "server_label": "code_interpreter",
                "server_url": "http://localhost:8888",
                # Using "*" to explicitly allow all tools from this MCP server
                # This is equivalent to not specifying allowed_tools
                "allowed_tools": ["*"],
            }
        ],
    )
    print(f"Status: {response.status}")
    print(f"Output: {response.output_text}")
    print()
 def example_specific_tools():
    """Example with specific allowed_tools list - filters available tools.
    Note: This example uses 'web_search_preview' (browser) which has multiple
    sub-tools: 'search', 'open', 'find'. The code_interpreter (python) doesn't
    have sub-tools, so filtering doesn't apply there.
    """
    print("=" * 60)
    print("Example 3: allowed_tools=['search'] (filter browser to specific tools)")
    print("=" * 60)
    base_url = "http://0.0.0.0:8000/v1"
    client = OpenAI(base_url=base_url, api_key="empty")
    model = get_first_model(client)
    response = client.responses.create(
        model=model,
        input="Search for 'Python programming tutorials'",
        instructions="Use the browser tool to search.",
        tools=[
            {
                "type": "mcp",
                "server_label": "web_search_preview",
                "server_url": "http://localhost:8888",
                # Browser has tools: 'search', 'open', 'find'
                # Only allow 'search' - blocks 'open' and 'find'
                "allowed_tools": ["search"],
            }
        ],
    )
    print(f"Status: {response.status}")
    print(f"Output: {response.output_text}")
    print()
 def example_object_format():
    """Example using object format for allowed_tools with browser tools."""
    print("=" * 60)
    print("Example 4: allowed_tools with object format")
    print("=" * 60)
    base_url = "http://0.0.0.0:8000/v1"
    client = OpenAI(base_url=base_url, api_key="empty")
    model = get_first_model(client)
    response = client.responses.create(
        model=model,
        input="Search for 'machine learning' and open the first result",
        instructions="Use the browser tool.",
        tools=[
            {
                "type": "mcp",
                "server_label": "web_search_preview",
                "server_url": "http://localhost:8888",
                # Object format with tool_names field
                # Can also include read_only and other fields
                # Browser has tools: 'search', 'open', 'find'
                "allowed_tools": {
                    "tool_names": [
                        "search",
                        "open",
                    ],  # Allow search and open, block find
                    "read_only": False,
                },
            }
        ],
    )
    print(f"Status: {response.status}")
    print(f"Output: {response.output_text}")
    print()
 def main():
    """Run all examples."""
    print("\n" + "=" * 60)
    print("MCP Tools with allowed_tools Examples")
    print("=" * 60 + "\n")
    # Run all examples
    example_no_filter()
    example_wildcard()
    example_specific_tools()
    example_object_format()
    print("=" * 60)
    print("Summary:")
    print("  - No filter or '*' → All tools available from server")
    print("  - Specific list → Only those sub-tools available")
    print("  - Object format → More control with tool_names field")
    print("")
    print("Note: allowed_tools filters SUB-TOOLS within an MCP server:")
    print("  - code_interpreter (python): No sub-tools to filter")
    print("  - web_search_preview (browser): Has 'search', 'open', 'find'")
    print("=" * 60)
 if __name__ == "__main__":
    main()
--- a/tests/entrypoints/openai/test_response_api_mcp_tools.py
+++ b/tests/entrypoints/openai/test_response_api_mcp_tools.py
@ -4,6 +4,9 @@
 import pytest
 import pytest_asyncio
 from openai import OpenAI
 from openai_harmony import ToolDescription, ToolNamespaceConfig
 from vllm.entrypoints.tool_server import MCPToolServer
 from ...utils import RemoteOpenAIServer
@ -111,6 +114,48 @@ async def test_mcp_tool_env_flag_enabled(mcp_enabled_client: OpenAI, model_name:
        )
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_mcp_tool_with_allowed_tools_star(
    mcp_enabled_client: OpenAI, model_name: str
 ):
    """Test MCP tool with allowed_tools=['*'] to select all available tools.
    This E2E test verifies that the "*" wildcard works end-to-end.
    See test_serving_responses.py for detailed unit tests of "*" normalization.
    """
    response = await mcp_enabled_client.responses.create(
        model=model_name,
        input=(
            "Execute the following code: "
            "import random; print(random.randint(1, 1000000))"
        ),
        instructions=(
            "You must use the Python tool to execute code. Never simulate execution."
        ),
        tools=[
            {
                "type": "mcp",
                "server_label": "code_interpreter",
                "server_url": "http://localhost:8888",
                # Using "*" to allow all tools from this MCP server
                "allowed_tools": ["*"],
            }
        ],
        extra_body={"enable_response_messages": True},
    )
    assert response is not None
    assert response.status == "completed"
    # Verify tool calls work with allowed_tools=["*"]
    tool_call_found = False
    for message in response.output_messages:
        recipient = message.get("recipient")
        if recipient and recipient.startswith("python"):
            tool_call_found = True
            break
    assert tool_call_found, "Should have found at least one Python tool call with '*'"
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_mcp_tool_env_flag_disabled(mcp_disabled_client: OpenAI, model_name: str):
@ -159,3 +204,58 @@ async def test_mcp_tool_env_flag_disabled(mcp_disabled_client: OpenAI, model_nam
        assert message.get("author").get("role") != "developer", (
            "No developer messages should be present without a valid tool"
        )
 def test_get_tool_description():
    """Test MCPToolServer.get_tool_description filtering logic.
    Note: The wildcard "*" is normalized to None by
    _extract_allowed_tools_from_mcp_requests before reaching this layer,
    so we only test None and specific tool filtering here.
    See test_serving_responses.py for "*" normalization tests.
    """
    pytest.importorskip("mcp")
    server = MCPToolServer()
    tool1 = ToolDescription.new(
        name="tool1", description="First", parameters={"type": "object"}
    )
    tool2 = ToolDescription.new(
        name="tool2", description="Second", parameters={"type": "object"}
    )
    tool3 = ToolDescription.new(
        name="tool3", description="Third", parameters={"type": "object"}
    )
    server.harmony_tool_descriptions = {
        "test_server": ToolNamespaceConfig(
            name="test_server", description="test", tools=[tool1, tool2, tool3]
        )
    }
    # Nonexistent server
    assert server.get_tool_description("nonexistent") is None
    # None (no filter) - returns all tools
    result = server.get_tool_description("test_server", allowed_tools=None)
    assert len(result.tools) == 3
    # Filter to specific tools
    result = server.get_tool_description(
        "test_server", allowed_tools=["tool1", "tool3"]
    )
    assert len(result.tools) == 2
    assert result.tools[0].name == "tool1"
    assert result.tools[1].name == "tool3"
    # Single tool
    result = server.get_tool_description("test_server", allowed_tools=["tool2"])
    assert len(result.tools) == 1
    assert result.tools[0].name == "tool2"
    # No matching tools - returns None
    result = server.get_tool_description("test_server", allowed_tools=["nonexistent"])
    assert result is None
    # Empty list - returns None
    assert server.get_tool_description("test_server", allowed_tools=[]) is None
--- a/tests/entrypoints/openai/test_serving_responses.py
+++ b/tests/entrypoints/openai/test_serving_responses.py
@ -17,6 +17,7 @@ from vllm.entrypoints.context import ConversationContext
 from vllm.entrypoints.openai.protocol import ErrorResponse, ResponsesRequest
 from vllm.entrypoints.openai.serving_responses import (
    OpenAIServingResponses,
    _extract_allowed_tools_from_mcp_requests,
    extract_tool_types,
 )
 from vllm.entrypoints.tool_server import ToolServer
@ -254,3 +255,98 @@ class TestValidateGeneratorInput:
        # Should return an ErrorResponse
        assert result is not None
        assert isinstance(result, ErrorResponse)
 class TestExtractAllowedToolsFromMcpRequests:
    """Test class for _extract_allowed_tools_from_mcp_requests function"""
    def test_extract_allowed_tools_basic_formats(self):
        """Test extraction with list format, object format, and None."""
        from openai.types.responses.tool import McpAllowedToolsMcpToolFilter
        tools = [
            # List format
            Mcp(
                type="mcp",
                server_label="server1",
                allowed_tools=["tool1", "tool2"],
            ),
            # Object format
            Mcp(
                type="mcp",
                server_label="server2",
                allowed_tools=McpAllowedToolsMcpToolFilter(
                    tool_names=["tool3", "tool4"]
                ),
            ),
            # None (no filter)
            Mcp(
                type="mcp",
                server_label="server3",
                allowed_tools=None,
            ),
        ]
        result = _extract_allowed_tools_from_mcp_requests(tools)
        assert result == {
            "server1": ["tool1", "tool2"],
            "server2": ["tool3", "tool4"],
            "server3": None,
        }
    def test_extract_allowed_tools_star_normalization(self):
        """Test that '*' wildcard is normalized to None (select all tools).
        This is the key test requested by reviewers to explicitly demonstrate
        that the "*" select-all scenario is handled correctly.
        """
        from openai.types.responses.tool import McpAllowedToolsMcpToolFilter
        tools = [
            # Star in list format
            Mcp(
                type="mcp",
                server_label="server1",
                allowed_tools=["*"],
            ),
            # Star mixed with other tools in list
            Mcp(
                type="mcp",
                server_label="server2",
                allowed_tools=["tool1", "*"],
            ),
            # Star in object format
            Mcp(
                type="mcp",
                server_label="server3",
                allowed_tools=McpAllowedToolsMcpToolFilter(tool_names=["*"]),
            ),
        ]
        result = _extract_allowed_tools_from_mcp_requests(tools)
        # All should be normalized to None (allows all tools)
        assert result == {
            "server1": None,
            "server2": None,
            "server3": None,
        }
    def test_extract_allowed_tools_filters_non_mcp(self):
        """Test that non-MCP tools are ignored during extraction."""
        tools = [
            Mcp(
                type="mcp",
                server_label="server1",
                allowed_tools=["tool1"],
            ),
            LocalShell(type="local_shell"),  # Non-MCP tool should be ignored
            Mcp(
                type="mcp",
                server_label="server2",
                allowed_tools=["tool2"],
            ),
        ]
        result = _extract_allowed_tools_from_mcp_requests(tools)
        # Non-MCP tools should be ignored
        assert result == {
            "server1": ["tool1"],
            "server2": ["tool2"],
        }
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@ -48,6 +48,7 @@ from openai.types.responses.response_output_text import Logprob, LogprobTopLogpr
 from openai.types.responses.response_reasoning_item import (
    Content as ResponseReasoningTextContent,
 )
 from openai.types.responses.tool import Mcp, Tool
 from openai_harmony import Message as OpenAIHarmonyMessage
 from vllm import envs
@ -111,6 +112,45 @@ from vllm.utils import random_uuid
 logger = init_logger(__name__)
 def _extract_allowed_tools_from_mcp_requests(
    tools: list[Tool],
 ) -> dict[str, list[str] | None]:
    """
    Extract allowed_tools mapping from MCP tool requests.
    Returns a dictionary mapping server_label to allowed_tools list.
    Handles both list format and McpAllowedToolsMcpToolFilter object format.
    Special handling:
    - If allowed_tools is None, returns None (allows all tools)
    - If allowed_tools contains "*", returns None (allows all tools)
    - Otherwise, returns the list of specific tool names
    This function can be reused for both harmony and non-harmony MCP calls.
    """
    allowed_tools_map: dict[str, list[str] | None] = {}
    for tool in tools:
        if not isinstance(tool, Mcp):
            continue
        # allowed_tools can be a list or an object with tool_names
        # Extract the actual list of tool names
        allowed_tools_val = None
        if tool.allowed_tools is not None:
            if isinstance(tool.allowed_tools, list):
                allowed_tools_val = tool.allowed_tools
            elif hasattr(tool.allowed_tools, "tool_names"):
                # It's an McpAllowedToolsMcpToolFilter object
                allowed_tools_val = tool.allowed_tools.tool_names
        # Normalize "*" to None (both mean "allow all tools")
        if allowed_tools_val is not None and "*" in allowed_tools_val:
            allowed_tools_val = None
        allowed_tools_map[tool.server_label] = allowed_tools_val
    return allowed_tools_map
 class OpenAIServingResponses(OpenAIServing):
    def __init__(
        self,
@ -878,38 +918,45 @@ class OpenAIServingResponses(OpenAIServing):
        self, request: ResponsesRequest, with_custom_tools: bool, tool_types: set[str]
    ) -> OpenAIHarmonyMessage:
        reasoning_effort = request.reasoning.effort if request.reasoning else None
-        enable_browser = (
+
-            "web_search_preview" in tool_types
+        # Extract allowed_tools from MCP tool requests
        allowed_tools_map = _extract_allowed_tools_from_mcp_requests(request.tools)
        # Get filtered tool descriptions first.
        # If get_tool_description returns None (due to filtering), the tool is disabled.
        browser_description = (
            self.tool_server.get_tool_description(
                "browser", allowed_tools_map.get("web_search_preview")
            )
            if "web_search_preview" in tool_types
            and self.tool_server is not None
            and self.tool_server.has_tool("browser")
            else None
        )
-        enable_code_interpreter = (
+        python_description = (
-            "code_interpreter" in tool_types
+            self.tool_server.get_tool_description(
                "python", allowed_tools_map.get("code_interpreter")
            )
            if "code_interpreter" in tool_types
            and self.tool_server is not None
            and self.tool_server.has_tool("python")
            else None
        )
-        enable_container = (
+        container_description = (
-            "container" in tool_types
+            self.tool_server.get_tool_description(
                "container", allowed_tools_map.get("container")
            )
            if "container" in tool_types
            and self.tool_server is not None
            and self.tool_server.has_tool("container")
            else None
        )
        sys_msg = get_system_message(
            reasoning_effort=reasoning_effort,
-            browser_description=(
+            browser_description=browser_description,
-                self.tool_server.get_tool_description("browser")
+            python_description=python_description,
-                if enable_browser and self.tool_server is not None
+            container_description=container_description,
                else None
            ),
            python_description=(
                self.tool_server.get_tool_description("python")
                if enable_code_interpreter and self.tool_server is not None
                else None
            ),
            container_description=(
                self.tool_server.get_tool_description("container")
                if enable_container and self.tool_server is not None
                else None
            ),
            instructions=request.instructions,
            with_custom_tools=with_custom_tools,
        )
--- a/vllm/entrypoints/tool_server.py
+++ b/vllm/entrypoints/tool_server.py
@ -80,7 +80,9 @@ class ToolServer(ABC):
        pass
    @abstractmethod
-    def get_tool_description(self, tool_name: str) -> ToolNamespaceConfig | None:
+    def get_tool_description(
        self, tool_name: str, allowed_tools: list[str] | None = None
    ) -> ToolNamespaceConfig | None:
        """
        Return the tool description for the given tool name.
        If the tool is not supported, return None.
@ -147,8 +149,29 @@ class MCPToolServer(ToolServer):
    def has_tool(self, tool_name: str):
        return tool_name in self.harmony_tool_descriptions
-    def get_tool_description(self, tool_name: str):
+    def get_tool_description(
-        return self.harmony_tool_descriptions.get(tool_name)
+        self,
        server_label: str,
        allowed_tools: list[str] | None = None,
    ) -> ToolNamespaceConfig | None:
        cfg = self.harmony_tool_descriptions.get(server_label)
        if cfg is None:
            return None
        # No restrictions: all tools from this MCP server
        if allowed_tools is None:
            return cfg
        filtered = [t for t in cfg.tools if t.name in allowed_tools]
        if not filtered:
            return None
        return ToolNamespaceConfig(
            name=cfg.name,
            description=cfg.description,
            tools=filtered,
        )
    @asynccontextmanager
    async def new_session(
@ -190,7 +213,9 @@ class DemoToolServer(ToolServer):
    def has_tool(self, tool_name: str) -> bool:
        return tool_name in self.tools
-    def get_tool_description(self, tool_name: str) -> ToolNamespaceConfig | None:
+    def get_tool_description(
        self, tool_name: str, allowed_tools: list[str] | None = None
    ) -> ToolNamespaceConfig | None:
        if tool_name not in self.tools:
            return None
        if tool_name == "browser":