mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-18 05:55:02 +08:00
[gpt-oss] Harmony changes with container tool support (#23386)
Signed-off-by: zhiweiz <zhiweiz@fb.com> Signed-off-by: Aaron Pham <contact@aarnphm.xyz> Signed-off-by: Lu Fang <30275821+houseroad@users.noreply.github.com> Co-authored-by: zhiweiz <zhiweiz@fb.com> Co-authored-by: Aaron Pham <contact@aarnphm.xyz> Co-authored-by: Simon Mo <simon.mo@hey.com> Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com>
This commit is contained in:
parent
955c624915
commit
170129eb28
@ -1,5 +1,7 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
import asyncio
|
||||||
|
import contextlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
@ -57,9 +59,14 @@ class ConversationContext(ABC):
|
|||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def init_tool_sessions(self, tool_server: Optional[ToolServer],
|
async def init_tool_sessions(self, tool_server: Optional[ToolServer],
|
||||||
exit_stack: AsyncExitStack) -> None:
|
exit_stack: AsyncExitStack,
|
||||||
|
request_id: str) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def cleanup_session(self) -> None:
|
||||||
|
raise NotImplementedError("Should not be called.")
|
||||||
|
|
||||||
|
|
||||||
class SimpleContext(ConversationContext):
|
class SimpleContext(ConversationContext):
|
||||||
|
|
||||||
@ -89,9 +96,13 @@ class SimpleContext(ConversationContext):
|
|||||||
raise NotImplementedError("Should not be called.")
|
raise NotImplementedError("Should not be called.")
|
||||||
|
|
||||||
async def init_tool_sessions(self, tool_server: Optional[ToolServer],
|
async def init_tool_sessions(self, tool_server: Optional[ToolServer],
|
||||||
exit_stack: AsyncExitStack) -> None:
|
exit_stack: AsyncExitStack,
|
||||||
|
request_id: str) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
async def cleanup_session(self) -> None:
|
||||||
|
raise NotImplementedError("Should not be called.")
|
||||||
|
|
||||||
|
|
||||||
class HarmonyContext(ConversationContext):
|
class HarmonyContext(ConversationContext):
|
||||||
|
|
||||||
@ -103,6 +114,7 @@ class HarmonyContext(ConversationContext):
|
|||||||
self._messages = messages
|
self._messages = messages
|
||||||
self.available_tools = available_tools
|
self.available_tools = available_tools
|
||||||
self._tool_sessions: dict[str, Union[ClientSession, Tool]] = {}
|
self._tool_sessions: dict[str, Union[ClientSession, Tool]] = {}
|
||||||
|
self.called_tools: set[str] = set()
|
||||||
|
|
||||||
self.parser = get_streamable_parser_for_assistant()
|
self.parser = get_streamable_parser_for_assistant()
|
||||||
self.num_init_messages = len(messages)
|
self.num_init_messages = len(messages)
|
||||||
@ -234,7 +246,8 @@ class HarmonyContext(ConversationContext):
|
|||||||
last_msg = self.messages[-1]
|
last_msg = self.messages[-1]
|
||||||
recipient = last_msg.recipient
|
recipient = last_msg.recipient
|
||||||
return recipient is not None and (recipient.startswith("browser.")
|
return recipient is not None and (recipient.startswith("browser.")
|
||||||
or recipient.startswith("python"))
|
or recipient.startswith("python") or
|
||||||
|
recipient.startswith("container."))
|
||||||
|
|
||||||
async def call_tool(self) -> list[Message]:
|
async def call_tool(self) -> list[Message]:
|
||||||
if not self.messages:
|
if not self.messages:
|
||||||
@ -248,6 +261,9 @@ class HarmonyContext(ConversationContext):
|
|||||||
elif recipient.startswith("python"):
|
elif recipient.startswith("python"):
|
||||||
return await self.call_python_tool(
|
return await self.call_python_tool(
|
||||||
self._tool_sessions["python"], last_msg)
|
self._tool_sessions["python"], last_msg)
|
||||||
|
elif recipient.startswith("container."):
|
||||||
|
return await self.call_container_tool(
|
||||||
|
self._tool_sessions["container"], last_msg)
|
||||||
raise ValueError("No tool call found")
|
raise ValueError("No tool call found")
|
||||||
|
|
||||||
def render_for_completion(self) -> list[int]:
|
def render_for_completion(self) -> list[int]:
|
||||||
@ -256,6 +272,7 @@ class HarmonyContext(ConversationContext):
|
|||||||
async def call_search_tool(self, tool_session: Union["ClientSession",
|
async def call_search_tool(self, tool_session: Union["ClientSession",
|
||||||
Tool],
|
Tool],
|
||||||
last_msg: Message) -> list[Message]:
|
last_msg: Message) -> list[Message]:
|
||||||
|
self.called_tools.add("browser")
|
||||||
if isinstance(tool_session, Tool):
|
if isinstance(tool_session, Tool):
|
||||||
return await tool_session.get_result(self)
|
return await tool_session.get_result(self)
|
||||||
tool_name = last_msg.recipient.split(".")[1]
|
tool_name = last_msg.recipient.split(".")[1]
|
||||||
@ -265,12 +282,16 @@ class HarmonyContext(ConversationContext):
|
|||||||
content = TextContent(text=result_str)
|
content = TextContent(text=result_str)
|
||||||
author = Author(role=Role.TOOL, name=last_msg.recipient)
|
author = Author(role=Role.TOOL, name=last_msg.recipient)
|
||||||
return [
|
return [
|
||||||
Message(author=author, content=[content], recipient=Role.ASSISTANT)
|
Message(author=author,
|
||||||
|
content=[content],
|
||||||
|
recipient=Role.ASSISTANT,
|
||||||
|
channel=last_msg.channel)
|
||||||
]
|
]
|
||||||
|
|
||||||
async def call_python_tool(self, tool_session: Union["ClientSession",
|
async def call_python_tool(self, tool_session: Union["ClientSession",
|
||||||
Tool],
|
Tool],
|
||||||
last_msg: Message) -> list[Message]:
|
last_msg: Message) -> list[Message]:
|
||||||
|
self.called_tools.add("python")
|
||||||
if isinstance(tool_session, Tool):
|
if isinstance(tool_session, Tool):
|
||||||
return await tool_session.get_result(self)
|
return await tool_session.get_result(self)
|
||||||
param = {
|
param = {
|
||||||
@ -290,13 +311,63 @@ class HarmonyContext(ConversationContext):
|
|||||||
]
|
]
|
||||||
|
|
||||||
async def init_tool_sessions(self, tool_server: Optional[ToolServer],
|
async def init_tool_sessions(self, tool_server: Optional[ToolServer],
|
||||||
exit_stack: AsyncExitStack) -> None:
|
exit_stack: AsyncExitStack,
|
||||||
|
request_id: str) -> None:
|
||||||
if tool_server:
|
if tool_server:
|
||||||
for tool_name in self.available_tools:
|
for tool_name in self.available_tools:
|
||||||
if tool_name not in self._tool_sessions:
|
if tool_name not in self._tool_sessions:
|
||||||
self._tool_sessions[
|
tool_session = await exit_stack.enter_async_context(
|
||||||
tool_name] = await exit_stack.enter_async_context(
|
tool_server.new_session(tool_name, request_id))
|
||||||
tool_server.new_session(tool_name))
|
self._tool_sessions[tool_name] = tool_session
|
||||||
|
exit_stack.push_async_exit(self.cleanup_session)
|
||||||
|
|
||||||
|
async def call_container_tool(self, tool_session: Union["ClientSession",
|
||||||
|
Tool],
|
||||||
|
last_msg: Message) -> list[Message]:
|
||||||
|
"""
|
||||||
|
Call container tool. Expect this to be run in a stateful docker
|
||||||
|
with command line terminal.
|
||||||
|
The official container tool would at least
|
||||||
|
expect the following format:
|
||||||
|
- for tool name: exec
|
||||||
|
- args:
|
||||||
|
{
|
||||||
|
"cmd":List[str] "command to execute",
|
||||||
|
"workdir":optional[str] "current working directory",
|
||||||
|
"env":optional[object/dict] "environment variables",
|
||||||
|
"session_name":optional[str] "session name",
|
||||||
|
"timeout":optional[int] "timeout in seconds",
|
||||||
|
"user":optional[str] "user name",
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
self.called_tools.add("container")
|
||||||
|
if isinstance(tool_session, Tool):
|
||||||
|
return await tool_session.get_result(self)
|
||||||
|
tool_name = last_msg.recipient.split(".")[1].split(" ")[0]
|
||||||
|
args = json.loads(last_msg.content[0].text)
|
||||||
|
result = await tool_session.call_tool(tool_name, args)
|
||||||
|
result_str = result.content[0].text
|
||||||
|
content = TextContent(text=result_str)
|
||||||
|
author = Author(role=Role.TOOL, name=last_msg.recipient)
|
||||||
|
return [
|
||||||
|
Message(author=author,
|
||||||
|
content=[content],
|
||||||
|
recipient=Role.ASSISTANT,
|
||||||
|
channel=last_msg.channel)
|
||||||
|
]
|
||||||
|
|
||||||
|
async def cleanup_session(self, *args, **kwargs) -> None:
|
||||||
|
"""Can be used as coro to used in __aexit__"""
|
||||||
|
|
||||||
|
async def cleanup_tool_session(tool_session):
|
||||||
|
if not isinstance(tool_session, Tool):
|
||||||
|
logger.info("Cleaning up tool session for %s",
|
||||||
|
tool_session._client_info)
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
await tool_session.call_tool("cleanup_session", {})
|
||||||
|
|
||||||
|
await asyncio.gather(*(cleanup_tool_session(self._tool_sessions[tool])
|
||||||
|
for tool in self.called_tools))
|
||||||
|
|
||||||
|
|
||||||
class StreamingHarmonyContext(HarmonyContext):
|
class StreamingHarmonyContext(HarmonyContext):
|
||||||
|
|||||||
@ -16,11 +16,13 @@ from openai.types.responses.response_function_web_search import (
|
|||||||
from openai.types.responses.response_reasoning_item import (
|
from openai.types.responses.response_reasoning_item import (
|
||||||
Content as ResponseReasoningTextContent)
|
Content as ResponseReasoningTextContent)
|
||||||
from openai.types.responses.tool import Tool
|
from openai.types.responses.tool import Tool
|
||||||
from openai_harmony import (Author, Conversation, DeveloperContent,
|
from openai_harmony import (Author, ChannelConfig, Conversation,
|
||||||
HarmonyEncodingName, Message, ReasoningEffort,
|
DeveloperContent, HarmonyEncodingName, Message,
|
||||||
Role, StreamableParser, SystemContent, TextContent,
|
ReasoningEffort, Role, StreamableParser,
|
||||||
ToolDescription, load_harmony_encoding)
|
SystemContent, TextContent, ToolDescription,
|
||||||
|
load_harmony_encoding)
|
||||||
|
|
||||||
|
from vllm import envs
|
||||||
from vllm.entrypoints.openai.protocol import (ChatCompletionToolsParam,
|
from vllm.entrypoints.openai.protocol import (ChatCompletionToolsParam,
|
||||||
ResponseInputOutputItem)
|
ResponseInputOutputItem)
|
||||||
from vllm.utils import random_uuid
|
from vllm.utils import random_uuid
|
||||||
@ -33,6 +35,20 @@ REASONING_EFFORT = {
|
|||||||
|
|
||||||
_harmony_encoding = None
|
_harmony_encoding = None
|
||||||
|
|
||||||
|
# Builtin tools that should be included in the system message when
|
||||||
|
# they are available and requested by the user.
|
||||||
|
# Tool args are provided by MCP tool descriptions. Output
|
||||||
|
# of the tools are stringified.
|
||||||
|
BUILTIN_TOOLS = {
|
||||||
|
"web_search_preview",
|
||||||
|
"code_interpreter",
|
||||||
|
"container",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def has_custom_tools(tool_types: list[str]) -> bool:
|
||||||
|
return not set(tool_types).issubset(BUILTIN_TOOLS)
|
||||||
|
|
||||||
|
|
||||||
def get_encoding():
|
def get_encoding():
|
||||||
global _harmony_encoding
|
global _harmony_encoding
|
||||||
@ -48,10 +64,19 @@ def get_system_message(
|
|||||||
start_date: Optional[str] = None,
|
start_date: Optional[str] = None,
|
||||||
browser_description: Optional[str] = None,
|
browser_description: Optional[str] = None,
|
||||||
python_description: Optional[str] = None,
|
python_description: Optional[str] = None,
|
||||||
|
container_description: Optional[str] = None,
|
||||||
|
instructions: Optional[str] = None,
|
||||||
|
with_custom_tools: bool = False,
|
||||||
) -> Message:
|
) -> Message:
|
||||||
sys_msg_content = SystemContent.new()
|
sys_msg_content = SystemContent.new()
|
||||||
if model_identity is not None:
|
if model_identity is not None:
|
||||||
sys_msg_content = sys_msg_content.with_model_identity(model_identity)
|
sys_msg_content = sys_msg_content.with_model_identity(model_identity)
|
||||||
|
if (instructions is not None
|
||||||
|
and envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS):
|
||||||
|
current_identity = sys_msg_content.model_identity
|
||||||
|
new_identity = (f'{current_identity}\n{instructions}'
|
||||||
|
if current_identity else instructions)
|
||||||
|
sys_msg_content = sys_msg_content.with_model_identity(new_identity)
|
||||||
if reasoning_effort is not None:
|
if reasoning_effort is not None:
|
||||||
sys_msg_content = sys_msg_content.with_reasoning_effort(
|
sys_msg_content = sys_msg_content.with_reasoning_effort(
|
||||||
REASONING_EFFORT[reasoning_effort])
|
REASONING_EFFORT[reasoning_effort])
|
||||||
@ -63,6 +88,14 @@ def get_system_message(
|
|||||||
sys_msg_content = sys_msg_content.with_tools(browser_description)
|
sys_msg_content = sys_msg_content.with_tools(browser_description)
|
||||||
if python_description is not None:
|
if python_description is not None:
|
||||||
sys_msg_content = sys_msg_content.with_tools(python_description)
|
sys_msg_content = sys_msg_content.with_tools(python_description)
|
||||||
|
if container_description is not None:
|
||||||
|
sys_msg_content = sys_msg_content.with_tools(container_description)
|
||||||
|
if not with_custom_tools:
|
||||||
|
channel_config = sys_msg_content.channel_config
|
||||||
|
invalid_channel = "commentary"
|
||||||
|
new_config = ChannelConfig.require_channels(
|
||||||
|
[c for c in channel_config.valid_channels if c != invalid_channel])
|
||||||
|
sys_msg_content = sys_msg_content.with_channel_config(new_config)
|
||||||
sys_msg = Message.from_role_and_content(Role.SYSTEM, sys_msg_content)
|
sys_msg = Message.from_role_and_content(Role.SYSTEM, sys_msg_content)
|
||||||
return sys_msg
|
return sys_msg
|
||||||
|
|
||||||
@ -86,14 +119,17 @@ def get_developer_message(
|
|||||||
tools: Optional[list[Union[Tool, ChatCompletionToolsParam]]] = None,
|
tools: Optional[list[Union[Tool, ChatCompletionToolsParam]]] = None,
|
||||||
) -> Message:
|
) -> Message:
|
||||||
dev_msg_content = DeveloperContent.new()
|
dev_msg_content = DeveloperContent.new()
|
||||||
if instructions is not None:
|
if (instructions is not None
|
||||||
|
and not envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS):
|
||||||
dev_msg_content = dev_msg_content.with_instructions(instructions)
|
dev_msg_content = dev_msg_content.with_instructions(instructions)
|
||||||
if tools is not None:
|
if tools is not None:
|
||||||
function_tools: list[Union[Tool, ChatCompletionToolsParam]] = []
|
function_tools: list[Union[Tool, ChatCompletionToolsParam]] = []
|
||||||
for tool in tools:
|
for tool in tools:
|
||||||
if tool.type in ("web_search_preview", "code_interpreter"):
|
if tool.type in ("web_search_preview", "code_interpreter",
|
||||||
|
"container"):
|
||||||
# These are built-in tools that are added to the system message.
|
# These are built-in tools that are added to the system message.
|
||||||
pass
|
pass
|
||||||
|
|
||||||
elif tool.type == "function":
|
elif tool.type == "function":
|
||||||
function_tools.append(tool)
|
function_tools.append(tool)
|
||||||
else:
|
else:
|
||||||
@ -136,6 +172,8 @@ def parse_response_input(
|
|||||||
TextContent(text=text_prefix + c["text"]) for c in content
|
TextContent(text=text_prefix + c["text"]) for c in content
|
||||||
]
|
]
|
||||||
msg = Message.from_role_and_contents(role, contents)
|
msg = Message.from_role_and_contents(role, contents)
|
||||||
|
if role == "assistant":
|
||||||
|
msg = msg.with_channel("final")
|
||||||
elif response_msg["type"] == "function_call_output":
|
elif response_msg["type"] == "function_call_output":
|
||||||
call_id = response_msg["call_id"]
|
call_id = response_msg["call_id"]
|
||||||
call_response: Optional[ResponseFunctionToolCall] = None
|
call_response: Optional[ResponseFunctionToolCall] = None
|
||||||
|
|||||||
@ -44,8 +44,9 @@ from vllm.entrypoints.context import (ConversationContext, HarmonyContext,
|
|||||||
SimpleContext, StreamingHarmonyContext)
|
SimpleContext, StreamingHarmonyContext)
|
||||||
from vllm.entrypoints.harmony_utils import (
|
from vllm.entrypoints.harmony_utils import (
|
||||||
get_developer_message, get_stop_tokens_for_assistant_actions,
|
get_developer_message, get_stop_tokens_for_assistant_actions,
|
||||||
get_system_message, get_user_message, parse_output_message,
|
get_system_message, get_user_message, has_custom_tools,
|
||||||
parse_remaining_state, parse_response_input, render_for_completion)
|
parse_output_message, parse_remaining_state, parse_response_input,
|
||||||
|
render_for_completion)
|
||||||
from vllm.entrypoints.logger import RequestLogger
|
from vllm.entrypoints.logger import RequestLogger
|
||||||
# yapf conflicts with isort for this block
|
# yapf conflicts with isort for this block
|
||||||
# yapf: disable
|
# yapf: disable
|
||||||
@ -266,6 +267,8 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
builtin_tool_list.append("browser")
|
builtin_tool_list.append("browser")
|
||||||
if self.tool_server.has_tool("python"):
|
if self.tool_server.has_tool("python"):
|
||||||
builtin_tool_list.append("python")
|
builtin_tool_list.append("python")
|
||||||
|
if self.tool_server.has_tool("container"):
|
||||||
|
builtin_tool_list.append("container")
|
||||||
|
|
||||||
if self.tool_server is not None:
|
if self.tool_server is not None:
|
||||||
available_tools = builtin_tool_list
|
available_tools = builtin_tool_list
|
||||||
@ -448,7 +451,8 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
|
|
||||||
async with AsyncExitStack() as exit_stack:
|
async with AsyncExitStack() as exit_stack:
|
||||||
try:
|
try:
|
||||||
await context.init_tool_sessions(self.tool_server, exit_stack)
|
await context.init_tool_sessions(self.tool_server, exit_stack,
|
||||||
|
request.request_id)
|
||||||
async for _ in result_generator:
|
async for _ in result_generator:
|
||||||
pass
|
pass
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
@ -710,13 +714,21 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
# New conversation.
|
# New conversation.
|
||||||
reasoning_effort = (request.reasoning.effort
|
reasoning_effort = (request.reasoning.effort
|
||||||
if request.reasoning else None)
|
if request.reasoning else None)
|
||||||
|
# Temporary: OpenAI types doesn't have container tool
|
||||||
|
# so we used MCP to cover that, up for change
|
||||||
tool_types = [tool.type for tool in request.tools]
|
tool_types = [tool.type for tool in request.tools]
|
||||||
|
if envs.VLLM_GPT_OSS_USE_CONTAINER_TOOL:
|
||||||
|
tool_types.append("container")
|
||||||
enable_browser = ("web_search_preview" in tool_types
|
enable_browser = ("web_search_preview" in tool_types
|
||||||
and self.tool_server is not None
|
and self.tool_server is not None
|
||||||
and self.tool_server.has_tool("browser"))
|
and self.tool_server.has_tool("browser"))
|
||||||
enable_code_interpreter = ("code_interpreter" in tool_types
|
enable_code_interpreter = ("code_interpreter" in tool_types
|
||||||
and self.tool_server is not None
|
and self.tool_server is not None
|
||||||
and self.tool_server.has_tool("python"))
|
and self.tool_server.has_tool("python"))
|
||||||
|
enable_container = ("container" in tool_types
|
||||||
|
and self.tool_server is not None
|
||||||
|
and self.tool_server.has_tool("container"))
|
||||||
|
with_custom_tools = has_custom_tools(tool_types)
|
||||||
sys_msg = get_system_message(
|
sys_msg = get_system_message(
|
||||||
reasoning_effort=reasoning_effort,
|
reasoning_effort=reasoning_effort,
|
||||||
browser_description=self.tool_server.get_tool_description(
|
browser_description=self.tool_server.get_tool_description(
|
||||||
@ -725,10 +737,16 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
python_description=self.tool_server.get_tool_description(
|
python_description=self.tool_server.get_tool_description(
|
||||||
"python") if enable_code_interpreter
|
"python") if enable_code_interpreter
|
||||||
and self.tool_server is not None else None,
|
and self.tool_server is not None else None,
|
||||||
|
container_description=self.tool_server.get_tool_description(
|
||||||
|
"container")
|
||||||
|
if enable_container and self.tool_server is not None else None,
|
||||||
|
instructions=request.instructions,
|
||||||
|
with_custom_tools=with_custom_tools,
|
||||||
)
|
)
|
||||||
messages.append(sys_msg)
|
messages.append(sys_msg)
|
||||||
dev_msg = get_developer_message(request.instructions,
|
if with_custom_tools:
|
||||||
request.tools)
|
dev_msg = get_developer_message(
|
||||||
|
instructions=request.instructions, tools=request.tools)
|
||||||
messages.append(dev_msg)
|
messages.append(dev_msg)
|
||||||
else:
|
else:
|
||||||
# Continue the previous conversation.
|
# Continue the previous conversation.
|
||||||
@ -1613,7 +1631,8 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
async with AsyncExitStack() as exit_stack:
|
async with AsyncExitStack() as exit_stack:
|
||||||
processer = None
|
processer = None
|
||||||
if self.use_harmony:
|
if self.use_harmony:
|
||||||
await context.init_tool_sessions(self.tool_server, exit_stack)
|
await context.init_tool_sessions(self.tool_server, exit_stack,
|
||||||
|
request.request_id)
|
||||||
processer = self._process_harmony_streaming_events
|
processer = self._process_harmony_streaming_events
|
||||||
else:
|
else:
|
||||||
processer = self._process_simple_streaming_events
|
processer = self._process_simple_streaming_events
|
||||||
|
|||||||
@ -86,7 +86,8 @@ class ToolServer(ABC):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def new_session(self, tool_name: str) -> AbstractAsyncContextManager[Any]:
|
def new_session(self, tool_name: str,
|
||||||
|
session_id: str) -> AbstractAsyncContextManager[Any]:
|
||||||
"""
|
"""
|
||||||
Create a session for the tool.
|
Create a session for the tool.
|
||||||
"""
|
"""
|
||||||
@ -124,7 +125,8 @@ class MCPToolServer(ToolServer):
|
|||||||
description=tool.description,
|
description=tool.description,
|
||||||
parameters=tool.inputSchema)
|
parameters=tool.inputSchema)
|
||||||
for tool in list_tools_response.tools
|
for tool in list_tools_response.tools
|
||||||
])
|
],
|
||||||
|
)
|
||||||
self.harmony_tool_descriptions[tool_from_mcp.name] = tool_from_mcp
|
self.harmony_tool_descriptions[tool_from_mcp.name] = tool_from_mcp
|
||||||
if tool_from_mcp.name not in self.urls:
|
if tool_from_mcp.name not in self.urls:
|
||||||
self.urls[tool_from_mcp.name] = url
|
self.urls[tool_from_mcp.name] = url
|
||||||
@ -142,13 +144,15 @@ class MCPToolServer(ToolServer):
|
|||||||
return self.harmony_tool_descriptions.get(tool_name)
|
return self.harmony_tool_descriptions.get(tool_name)
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def new_session(self, tool_name: str):
|
async def new_session(self, tool_name: str, session_id: str):
|
||||||
from mcp import ClientSession
|
from mcp import ClientSession
|
||||||
from mcp.client.sse import sse_client
|
from mcp.client.sse import sse_client
|
||||||
url = self.urls.get(tool_name)
|
url = self.urls.get(tool_name)
|
||||||
|
headers = {"x-session-id": session_id}
|
||||||
if not url:
|
if not url:
|
||||||
raise KeyError(f"Tool '{tool_name}' is not supported")
|
raise KeyError(f"Tool '{tool_name}' is not supported")
|
||||||
async with sse_client(url=url) as streams, ClientSession(
|
async with sse_client(url=url,
|
||||||
|
headers=headers) as streams, ClientSession(
|
||||||
*streams) as session:
|
*streams) as session:
|
||||||
await session.initialize()
|
await session.initialize()
|
||||||
yield session
|
yield session
|
||||||
@ -182,7 +186,7 @@ class DemoToolServer(ToolServer):
|
|||||||
raise ValueError(f"Unknown tool {tool_name}")
|
raise ValueError(f"Unknown tool {tool_name}")
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def new_session(self, tool_name: str):
|
async def new_session(self, tool_name: str, session_id: str):
|
||||||
if tool_name not in self.tools:
|
if tool_name not in self.tools:
|
||||||
raise KeyError(f"Tool '{tool_name}' is not supported")
|
raise KeyError(f"Tool '{tool_name}' is not supported")
|
||||||
yield self.tools[tool_name]
|
yield self.tools[tool_name]
|
||||||
|
|||||||
11
vllm/envs.py
11
vllm/envs.py
@ -168,6 +168,8 @@ if TYPE_CHECKING:
|
|||||||
VLLM_ALLREDUCE_USE_SYMM_MEM: bool = False
|
VLLM_ALLREDUCE_USE_SYMM_MEM: bool = False
|
||||||
VLLM_TUNED_CONFIG_FOLDER: Optional[str] = None
|
VLLM_TUNED_CONFIG_FOLDER: Optional[str] = None
|
||||||
VLLM_DISABLE_PAD_FOR_CUDAGRAPH: bool = False
|
VLLM_DISABLE_PAD_FOR_CUDAGRAPH: bool = False
|
||||||
|
VLLM_GPT_OSS_USE_CONTAINER_TOOL: bool = False
|
||||||
|
VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False
|
||||||
VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False
|
VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False
|
||||||
|
|
||||||
|
|
||||||
@ -1201,6 +1203,15 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
"VLLM_TUNED_CONFIG_FOLDER":
|
"VLLM_TUNED_CONFIG_FOLDER":
|
||||||
lambda: os.getenv("VLLM_TUNED_CONFIG_FOLDER", None),
|
lambda: os.getenv("VLLM_TUNED_CONFIG_FOLDER", None),
|
||||||
|
|
||||||
|
# Allows vllm use container tool
|
||||||
|
"VLLM_GPT_OSS_USE_CONTAINER_TOOL":
|
||||||
|
lambda: bool(int(os.getenv("VLLM_GPT_OSS_USE_CONTAINER_TOOL", "0"))),
|
||||||
|
|
||||||
|
# Allows harmony instructions to be injected on system messages
|
||||||
|
"VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS":
|
||||||
|
lambda: bool(
|
||||||
|
int(os.getenv("VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS", "0"))),
|
||||||
|
|
||||||
# Add optional custom scopes for profiling, disable to avoid overheads
|
# Add optional custom scopes for profiling, disable to avoid overheads
|
||||||
"VLLM_CUSTOM_SCOPES_FOR_PROFILING":
|
"VLLM_CUSTOM_SCOPES_FOR_PROFILING":
|
||||||
lambda: bool(int(os.getenv("VLLM_CUSTOM_SCOPES_FOR_PROFILING", "0"))),
|
lambda: bool(int(os.getenv("VLLM_CUSTOM_SCOPES_FOR_PROFILING", "0"))),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user