[gpt-oss] Small bug fixes for frontend (#22512)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
Chen Zhang 2025-08-11 22:04:38 -07:00 committed by GitHub
parent bbaf9e9cb1
commit ad344ef552
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 76 additions and 32 deletions

View File

@ -1,15 +1,20 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json
import logging import logging
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Union
from openai_harmony import Message, Role, StreamState from openai_harmony import Author, Message, Role, StreamState, TextContent
from vllm.entrypoints.harmony_utils import ( from vllm.entrypoints.harmony_utils import (
get_encoding, get_streamable_parser_for_assistant, render_for_completion) get_encoding, get_streamable_parser_for_assistant, render_for_completion)
from vllm.entrypoints.tool import Tool from vllm.entrypoints.tool import Tool
from vllm.outputs import RequestOutput from vllm.outputs import RequestOutput
if TYPE_CHECKING:
from mcp.client import ClientSession
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -71,6 +76,7 @@ class HarmonyContext(ConversationContext):
def append_output(self, output) -> None: def append_output(self, output) -> None:
if isinstance(output, RequestOutput): if isinstance(output, RequestOutput):
output_token_ids = output.outputs[0].token_ids output_token_ids = output.outputs[0].token_ids
self.parser = get_streamable_parser_for_assistant()
for token_id in output_token_ids: for token_id in output_token_ids:
self.parser.process(token_id) self.parser.process(token_id)
output_msgs = self.parser.messages output_msgs = self.parser.messages
@ -106,19 +112,41 @@ class HarmonyContext(ConversationContext):
def render_for_completion(self) -> list[int]: def render_for_completion(self) -> list[int]:
return render_for_completion(self.messages) return render_for_completion(self.messages)
async def call_search_tool( async def call_search_tool(self, tool_session: Union["ClientSession",
self, Tool],
tool_session: Tool, last_msg: Message) -> list[Message]:
last_msg: Message, if isinstance(tool_session, Tool):
) -> list[Message]: return await tool_session.get_result(self)
return await tool_session.get_result(self) tool_name = last_msg.recipient.split(".")[1]
args = json.loads(last_msg.content[0].text)
result = await tool_session.call_tool(tool_name, args)
result_str = result.content[0].text
content = TextContent(text=result_str)
author = Author(role=Role.TOOL, name=last_msg.recipient)
return [
Message(author=author, content=[content], recipient=Role.ASSISTANT)
]
async def call_python_tool( async def call_python_tool(self, tool_session: Union["ClientSession",
self, Tool],
tool_session: Tool, last_msg: Message) -> list[Message]:
last_msg: Message, if isinstance(tool_session, Tool):
) -> list[Message]: return await tool_session.get_result(self)
return await tool_session.get_result(self) param = {
"code": last_msg.content[0].text,
}
result = await tool_session.call_tool("python", param)
result_str = result.content[0].text
content = TextContent(text=result_str)
author = Author(role=Role.TOOL, name="python")
return [
Message(author=author,
content=[content],
channel=last_msg.channel,
recipient=Role.ASSISTANT)
]
class StreamingHarmonyContext(HarmonyContext): class StreamingHarmonyContext(HarmonyContext):

View File

@ -19,8 +19,8 @@ from openai.types.chat.chat_completion_message import (
# yapf: enable # yapf: enable
from openai.types.responses import (ResponseFunctionToolCall, from openai.types.responses import (ResponseFunctionToolCall,
ResponseInputItemParam, ResponseOutputItem, ResponseInputItemParam, ResponseOutputItem,
ResponsePrompt, ResponseStatus, ResponsePrompt, ResponseReasoningItem,
ResponseTextConfig) ResponseStatus, ResponseTextConfig)
from openai.types.responses.response import ToolChoice from openai.types.responses.response import ToolChoice
from openai.types.responses.tool import Tool from openai.types.responses.tool import Tool
from openai.types.shared import Metadata, Reasoning from openai.types.shared import Metadata, Reasoning
@ -239,6 +239,7 @@ def get_logits_processors(processors: Optional[LogitsProcessors],
ResponseInputOutputItem: TypeAlias = Union[ResponseInputItemParam, ResponseInputOutputItem: TypeAlias = Union[ResponseInputItemParam,
ResponseReasoningItem,
ResponseFunctionToolCall] ResponseFunctionToolCall]

View File

@ -16,8 +16,7 @@ from fastapi import Request
from openai import BaseModel from openai import BaseModel
# yapf conflicts with isort for this block # yapf conflicts with isort for this block
# yapf: disable # yapf: disable
from openai.types.responses import (ResponseContentPartDoneEvent, from openai.types.responses import (ResponseCreatedEvent,
ResponseCreatedEvent,
ResponseFunctionToolCall, ResponseFunctionToolCall,
ResponseInProgressEvent, ResponseInProgressEvent,
ResponseOutputItem, ResponseOutputItem,
@ -54,7 +53,7 @@ from vllm.entrypoints.openai.protocol import (ErrorResponse,
# yapf: enable # yapf: enable
from vllm.entrypoints.openai.serving_engine import OpenAIServing from vllm.entrypoints.openai.serving_engine import OpenAIServing
from vllm.entrypoints.openai.serving_models import OpenAIServingModels from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.entrypoints.tool_server import ToolServer from vllm.entrypoints.tool_server import MCPToolServer, ToolServer
from vllm.inputs.data import TokensPrompt as EngineTokensPrompt from vllm.inputs.data import TokensPrompt as EngineTokensPrompt
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.outputs import CompletionOutput from vllm.outputs import CompletionOutput
@ -238,6 +237,15 @@ class OpenAIServingResponses(OpenAIServing):
if raw_request: if raw_request:
raw_request.state.request_metadata = request_metadata raw_request.state.request_metadata = request_metadata
if self.tool_server is not None and isinstance(
self.tool_server, MCPToolServer
) and (request.background or request.stream) and request.tools and any(
tool.type in ["web_search_preview", "code_interpreter"]
for tool in request.tools):
return self.create_error_response(
"MCP tool server is not supported in background mode and "
"streaming mode")
# Schedule the request and get the result generator. # Schedule the request and get the result generator.
generators: list[AsyncGenerator[ConversationContext, None]] = [] generators: list[AsyncGenerator[ConversationContext, None]] = []
@ -844,9 +852,13 @@ class OpenAIServingResponses(OpenAIServing):
type="reasoning", type="reasoning",
content=[ content=[
ResponseReasoningTextContent( ResponseReasoningTextContent(
text=previous_item.content[0].text), text=previous_item.content[0].text,
type="reasoning_text",
),
], ],
status="completed", status="completed",
id=current_item_id,
summary=[],
) )
yield _send_event( yield _send_event(
ResponseReasoningTextDoneEvent( ResponseReasoningTextDoneEvent(
@ -857,15 +869,6 @@ class OpenAIServingResponses(OpenAIServing):
content_index=current_content_index, content_index=current_content_index,
text=previous_item.content[0].text, text=previous_item.content[0].text,
)) ))
yield _send_event(
ResponseContentPartDoneEvent(
type="response.content_part.done",
item_id=current_item_id,
sequence_number=-1,
output_index=current_output_index,
content_index=current_content_index,
part=reasoning_item,
))
yield _send_event( yield _send_event(
ResponseOutputItemDoneEvent( ResponseOutputItemDoneEvent(
type="response.output_item.done", type="response.output_item.done",

View File

@ -2,7 +2,9 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import os import os
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any from typing import TYPE_CHECKING, Any, Optional
from openai_harmony import Message
from vllm.logger import init_logger from vllm.logger import init_logger
@ -70,7 +72,16 @@ class HarmonyPythonTool(Tool):
"gpt_oss is not installed, code interpreter is disabled") "gpt_oss is not installed, code interpreter is disabled")
return return
self.python_tool = PythonTool() # NOTE (Chen): as of gpt-oss 0.0.2, there is a bug in _make_response
# and we do the following monkey patch to fix it.
class PatchedGptOssPythonTool(PythonTool):
def _make_response(self,
output: str,
channel: Optional[str] = None) -> Message:
return super()._make_response(output)
self.python_tool = PatchedGptOssPythonTool()
logger.info_once("Code interpreter tool initialized") logger.info_once("Code interpreter tool initialized")
async def get_result(self, context: "ConversationContext") -> Any: async def get_result(self, context: "ConversationContext") -> Any:

View File

@ -4,7 +4,7 @@ from abc import ABC, abstractmethod
from contextlib import AbstractAsyncContextManager, asynccontextmanager from contextlib import AbstractAsyncContextManager, asynccontextmanager
from typing import TYPE_CHECKING, Any, Optional from typing import TYPE_CHECKING, Any, Optional
from openai_harmony import ToolNamespaceConfig from openai_harmony import ToolDescription, ToolNamespaceConfig
from vllm.entrypoints.tool import HarmonyBrowserTool, HarmonyPythonTool, Tool from vllm.entrypoints.tool import HarmonyBrowserTool, HarmonyPythonTool, Tool
from vllm.logger import init_logger from vllm.logger import init_logger
@ -105,7 +105,6 @@ class MCPToolServer(ToolServer):
self.harmony_tool_descriptions = {} self.harmony_tool_descriptions = {}
async def add_tool_server(self, server_url: str): async def add_tool_server(self, server_url: str):
from mcp.types import ToolDescription
tool_urls = server_url.split(",") tool_urls = server_url.split(",")
self.harmony_tool_descriptions = {} self.harmony_tool_descriptions = {}
self.urls: dict[str, str] = {} self.urls: dict[str, str] = {}
@ -133,6 +132,8 @@ class MCPToolServer(ToolServer):
logger.warning( logger.warning(
"Tool %s already exists. Ignoring duplicate tool server %s", "Tool %s already exists. Ignoring duplicate tool server %s",
tool_from_mcp.name, url) tool_from_mcp.name, url)
logger.info("MCPToolServer initialized with tools: %s",
list(self.harmony_tool_descriptions.keys()))
def has_tool(self, tool_name: str): def has_tool(self, tool_name: str):
return tool_name in self.harmony_tool_descriptions return tool_name in self.harmony_tool_descriptions