mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 00:44:57 +08:00
[gpt-oss] Small bug fixes for frontend (#22512)
Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
parent
bbaf9e9cb1
commit
ad344ef552
@ -1,15 +1,20 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import TYPE_CHECKING, Union
|
||||||
|
|
||||||
from openai_harmony import Message, Role, StreamState
|
from openai_harmony import Author, Message, Role, StreamState, TextContent
|
||||||
|
|
||||||
from vllm.entrypoints.harmony_utils import (
|
from vllm.entrypoints.harmony_utils import (
|
||||||
get_encoding, get_streamable_parser_for_assistant, render_for_completion)
|
get_encoding, get_streamable_parser_for_assistant, render_for_completion)
|
||||||
from vllm.entrypoints.tool import Tool
|
from vllm.entrypoints.tool import Tool
|
||||||
from vllm.outputs import RequestOutput
|
from vllm.outputs import RequestOutput
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from mcp.client import ClientSession
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@ -71,6 +76,7 @@ class HarmonyContext(ConversationContext):
|
|||||||
def append_output(self, output) -> None:
|
def append_output(self, output) -> None:
|
||||||
if isinstance(output, RequestOutput):
|
if isinstance(output, RequestOutput):
|
||||||
output_token_ids = output.outputs[0].token_ids
|
output_token_ids = output.outputs[0].token_ids
|
||||||
|
self.parser = get_streamable_parser_for_assistant()
|
||||||
for token_id in output_token_ids:
|
for token_id in output_token_ids:
|
||||||
self.parser.process(token_id)
|
self.parser.process(token_id)
|
||||||
output_msgs = self.parser.messages
|
output_msgs = self.parser.messages
|
||||||
@ -106,19 +112,41 @@ class HarmonyContext(ConversationContext):
|
|||||||
def render_for_completion(self) -> list[int]:
|
def render_for_completion(self) -> list[int]:
|
||||||
return render_for_completion(self.messages)
|
return render_for_completion(self.messages)
|
||||||
|
|
||||||
async def call_search_tool(
|
async def call_search_tool(self, tool_session: Union["ClientSession",
|
||||||
self,
|
Tool],
|
||||||
tool_session: Tool,
|
last_msg: Message) -> list[Message]:
|
||||||
last_msg: Message,
|
if isinstance(tool_session, Tool):
|
||||||
) -> list[Message]:
|
return await tool_session.get_result(self)
|
||||||
return await tool_session.get_result(self)
|
tool_name = last_msg.recipient.split(".")[1]
|
||||||
|
args = json.loads(last_msg.content[0].text)
|
||||||
|
result = await tool_session.call_tool(tool_name, args)
|
||||||
|
result_str = result.content[0].text
|
||||||
|
content = TextContent(text=result_str)
|
||||||
|
author = Author(role=Role.TOOL, name=last_msg.recipient)
|
||||||
|
return [
|
||||||
|
Message(author=author, content=[content], recipient=Role.ASSISTANT)
|
||||||
|
]
|
||||||
|
|
||||||
async def call_python_tool(
|
async def call_python_tool(self, tool_session: Union["ClientSession",
|
||||||
self,
|
Tool],
|
||||||
tool_session: Tool,
|
last_msg: Message) -> list[Message]:
|
||||||
last_msg: Message,
|
if isinstance(tool_session, Tool):
|
||||||
) -> list[Message]:
|
return await tool_session.get_result(self)
|
||||||
return await tool_session.get_result(self)
|
param = {
|
||||||
|
"code": last_msg.content[0].text,
|
||||||
|
}
|
||||||
|
result = await tool_session.call_tool("python", param)
|
||||||
|
result_str = result.content[0].text
|
||||||
|
|
||||||
|
content = TextContent(text=result_str)
|
||||||
|
author = Author(role=Role.TOOL, name="python")
|
||||||
|
|
||||||
|
return [
|
||||||
|
Message(author=author,
|
||||||
|
content=[content],
|
||||||
|
channel=last_msg.channel,
|
||||||
|
recipient=Role.ASSISTANT)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class StreamingHarmonyContext(HarmonyContext):
|
class StreamingHarmonyContext(HarmonyContext):
|
||||||
|
|||||||
@ -19,8 +19,8 @@ from openai.types.chat.chat_completion_message import (
|
|||||||
# yapf: enable
|
# yapf: enable
|
||||||
from openai.types.responses import (ResponseFunctionToolCall,
|
from openai.types.responses import (ResponseFunctionToolCall,
|
||||||
ResponseInputItemParam, ResponseOutputItem,
|
ResponseInputItemParam, ResponseOutputItem,
|
||||||
ResponsePrompt, ResponseStatus,
|
ResponsePrompt, ResponseReasoningItem,
|
||||||
ResponseTextConfig)
|
ResponseStatus, ResponseTextConfig)
|
||||||
from openai.types.responses.response import ToolChoice
|
from openai.types.responses.response import ToolChoice
|
||||||
from openai.types.responses.tool import Tool
|
from openai.types.responses.tool import Tool
|
||||||
from openai.types.shared import Metadata, Reasoning
|
from openai.types.shared import Metadata, Reasoning
|
||||||
@ -239,6 +239,7 @@ def get_logits_processors(processors: Optional[LogitsProcessors],
|
|||||||
|
|
||||||
|
|
||||||
ResponseInputOutputItem: TypeAlias = Union[ResponseInputItemParam,
|
ResponseInputOutputItem: TypeAlias = Union[ResponseInputItemParam,
|
||||||
|
ResponseReasoningItem,
|
||||||
ResponseFunctionToolCall]
|
ResponseFunctionToolCall]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -16,8 +16,7 @@ from fastapi import Request
|
|||||||
from openai import BaseModel
|
from openai import BaseModel
|
||||||
# yapf conflicts with isort for this block
|
# yapf conflicts with isort for this block
|
||||||
# yapf: disable
|
# yapf: disable
|
||||||
from openai.types.responses import (ResponseContentPartDoneEvent,
|
from openai.types.responses import (ResponseCreatedEvent,
|
||||||
ResponseCreatedEvent,
|
|
||||||
ResponseFunctionToolCall,
|
ResponseFunctionToolCall,
|
||||||
ResponseInProgressEvent,
|
ResponseInProgressEvent,
|
||||||
ResponseOutputItem,
|
ResponseOutputItem,
|
||||||
@ -54,7 +53,7 @@ from vllm.entrypoints.openai.protocol import (ErrorResponse,
|
|||||||
# yapf: enable
|
# yapf: enable
|
||||||
from vllm.entrypoints.openai.serving_engine import OpenAIServing
|
from vllm.entrypoints.openai.serving_engine import OpenAIServing
|
||||||
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
|
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
|
||||||
from vllm.entrypoints.tool_server import ToolServer
|
from vllm.entrypoints.tool_server import MCPToolServer, ToolServer
|
||||||
from vllm.inputs.data import TokensPrompt as EngineTokensPrompt
|
from vllm.inputs.data import TokensPrompt as EngineTokensPrompt
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.outputs import CompletionOutput
|
from vllm.outputs import CompletionOutput
|
||||||
@ -238,6 +237,15 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
if raw_request:
|
if raw_request:
|
||||||
raw_request.state.request_metadata = request_metadata
|
raw_request.state.request_metadata = request_metadata
|
||||||
|
|
||||||
|
if self.tool_server is not None and isinstance(
|
||||||
|
self.tool_server, MCPToolServer
|
||||||
|
) and (request.background or request.stream) and request.tools and any(
|
||||||
|
tool.type in ["web_search_preview", "code_interpreter"]
|
||||||
|
for tool in request.tools):
|
||||||
|
return self.create_error_response(
|
||||||
|
"MCP tool server is not supported in background mode and "
|
||||||
|
"streaming mode")
|
||||||
|
|
||||||
# Schedule the request and get the result generator.
|
# Schedule the request and get the result generator.
|
||||||
generators: list[AsyncGenerator[ConversationContext, None]] = []
|
generators: list[AsyncGenerator[ConversationContext, None]] = []
|
||||||
|
|
||||||
@ -844,9 +852,13 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
type="reasoning",
|
type="reasoning",
|
||||||
content=[
|
content=[
|
||||||
ResponseReasoningTextContent(
|
ResponseReasoningTextContent(
|
||||||
text=previous_item.content[0].text),
|
text=previous_item.content[0].text,
|
||||||
|
type="reasoning_text",
|
||||||
|
),
|
||||||
],
|
],
|
||||||
status="completed",
|
status="completed",
|
||||||
|
id=current_item_id,
|
||||||
|
summary=[],
|
||||||
)
|
)
|
||||||
yield _send_event(
|
yield _send_event(
|
||||||
ResponseReasoningTextDoneEvent(
|
ResponseReasoningTextDoneEvent(
|
||||||
@ -857,15 +869,6 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
content_index=current_content_index,
|
content_index=current_content_index,
|
||||||
text=previous_item.content[0].text,
|
text=previous_item.content[0].text,
|
||||||
))
|
))
|
||||||
yield _send_event(
|
|
||||||
ResponseContentPartDoneEvent(
|
|
||||||
type="response.content_part.done",
|
|
||||||
item_id=current_item_id,
|
|
||||||
sequence_number=-1,
|
|
||||||
output_index=current_output_index,
|
|
||||||
content_index=current_content_index,
|
|
||||||
part=reasoning_item,
|
|
||||||
))
|
|
||||||
yield _send_event(
|
yield _send_event(
|
||||||
ResponseOutputItemDoneEvent(
|
ResponseOutputItemDoneEvent(
|
||||||
type="response.output_item.done",
|
type="response.output_item.done",
|
||||||
|
|||||||
@ -2,7 +2,9 @@
|
|||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
import os
|
import os
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import TYPE_CHECKING, Any
|
from typing import TYPE_CHECKING, Any, Optional
|
||||||
|
|
||||||
|
from openai_harmony import Message
|
||||||
|
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
|
||||||
@ -70,7 +72,16 @@ class HarmonyPythonTool(Tool):
|
|||||||
"gpt_oss is not installed, code interpreter is disabled")
|
"gpt_oss is not installed, code interpreter is disabled")
|
||||||
return
|
return
|
||||||
|
|
||||||
self.python_tool = PythonTool()
|
# NOTE (Chen): as of gpt-oss 0.0.2, there is a bug in _make_response
|
||||||
|
# and we do the following monkey patch to fix it.
|
||||||
|
class PatchedGptOssPythonTool(PythonTool):
|
||||||
|
|
||||||
|
def _make_response(self,
|
||||||
|
output: str,
|
||||||
|
channel: Optional[str] = None) -> Message:
|
||||||
|
return super()._make_response(output)
|
||||||
|
|
||||||
|
self.python_tool = PatchedGptOssPythonTool()
|
||||||
logger.info_once("Code interpreter tool initialized")
|
logger.info_once("Code interpreter tool initialized")
|
||||||
|
|
||||||
async def get_result(self, context: "ConversationContext") -> Any:
|
async def get_result(self, context: "ConversationContext") -> Any:
|
||||||
|
|||||||
@ -4,7 +4,7 @@ from abc import ABC, abstractmethod
|
|||||||
from contextlib import AbstractAsyncContextManager, asynccontextmanager
|
from contextlib import AbstractAsyncContextManager, asynccontextmanager
|
||||||
from typing import TYPE_CHECKING, Any, Optional
|
from typing import TYPE_CHECKING, Any, Optional
|
||||||
|
|
||||||
from openai_harmony import ToolNamespaceConfig
|
from openai_harmony import ToolDescription, ToolNamespaceConfig
|
||||||
|
|
||||||
from vllm.entrypoints.tool import HarmonyBrowserTool, HarmonyPythonTool, Tool
|
from vllm.entrypoints.tool import HarmonyBrowserTool, HarmonyPythonTool, Tool
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
@ -105,7 +105,6 @@ class MCPToolServer(ToolServer):
|
|||||||
self.harmony_tool_descriptions = {}
|
self.harmony_tool_descriptions = {}
|
||||||
|
|
||||||
async def add_tool_server(self, server_url: str):
|
async def add_tool_server(self, server_url: str):
|
||||||
from mcp.types import ToolDescription
|
|
||||||
tool_urls = server_url.split(",")
|
tool_urls = server_url.split(",")
|
||||||
self.harmony_tool_descriptions = {}
|
self.harmony_tool_descriptions = {}
|
||||||
self.urls: dict[str, str] = {}
|
self.urls: dict[str, str] = {}
|
||||||
@ -133,6 +132,8 @@ class MCPToolServer(ToolServer):
|
|||||||
logger.warning(
|
logger.warning(
|
||||||
"Tool %s already exists. Ignoring duplicate tool server %s",
|
"Tool %s already exists. Ignoring duplicate tool server %s",
|
||||||
tool_from_mcp.name, url)
|
tool_from_mcp.name, url)
|
||||||
|
logger.info("MCPToolServer initialized with tools: %s",
|
||||||
|
list(self.harmony_tool_descriptions.keys()))
|
||||||
|
|
||||||
def has_tool(self, tool_name: str):
|
def has_tool(self, tool_name: str):
|
||||||
return tool_name in self.harmony_tool_descriptions
|
return tool_name in self.harmony_tool_descriptions
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user