mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-05 02:35:40 +08:00
[Feature][Responses API] Stream Function Call - harmony (#24317)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
parent
720394de43
commit
df850c4912
@ -16,6 +16,22 @@ from ...utils import RemoteOpenAIServer
|
|||||||
|
|
||||||
MODEL_NAME = "openai/gpt-oss-20b"
|
MODEL_NAME = "openai/gpt-oss-20b"
|
||||||
|
|
||||||
|
GET_WEATHER_SCHEMA = {
|
||||||
|
"type": "function",
|
||||||
|
"name": "get_weather",
|
||||||
|
"description": "Get current temperature for provided coordinates in celsius.", # noqa
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"latitude": {"type": "number"},
|
||||||
|
"longitude": {"type": "number"},
|
||||||
|
},
|
||||||
|
"required": ["latitude", "longitude"],
|
||||||
|
"additionalProperties": False,
|
||||||
|
},
|
||||||
|
"strict": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def server():
|
def server():
|
||||||
@ -305,6 +321,54 @@ async def test_streaming_types(client: OpenAI, model_name: str):
|
|||||||
assert len(stack_of_event_types) == 0
|
assert len(stack_of_event_types) == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||||
|
async def test_function_calling_with_streaming_types(client: OpenAI, model_name: str):
|
||||||
|
# this links the "done" type with the "start" type
|
||||||
|
# so every "done" type should have a corresponding "start" type
|
||||||
|
# and every open block should be closed by the end of the stream
|
||||||
|
pairs_of_event_types = {
|
||||||
|
"response.completed": "response.created",
|
||||||
|
"response.output_item.done": "response.output_item.added",
|
||||||
|
"response.output_text.done": "response.output_text.delta",
|
||||||
|
"response.reasoning_text.done": "response.reasoning_text.delta",
|
||||||
|
"response.reasoning_part.done": "response.reasoning_part.added",
|
||||||
|
"response.function_call_arguments.done": "response.function_call_arguments.delta", # noqa
|
||||||
|
}
|
||||||
|
|
||||||
|
tools = [GET_WEATHER_SCHEMA]
|
||||||
|
input_list = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What's the weather like in Paris today?",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
stream_response = await client.responses.create(
|
||||||
|
model=model_name,
|
||||||
|
input=input_list,
|
||||||
|
tools=tools,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
stack_of_event_types = []
|
||||||
|
async for event in stream_response:
|
||||||
|
if event.type == "response.created":
|
||||||
|
stack_of_event_types.append(event.type)
|
||||||
|
elif event.type == "response.completed":
|
||||||
|
assert stack_of_event_types[-1] == pairs_of_event_types[event.type]
|
||||||
|
stack_of_event_types.pop()
|
||||||
|
if event.type.endswith("added"):
|
||||||
|
stack_of_event_types.append(event.type)
|
||||||
|
elif event.type.endswith("delta"):
|
||||||
|
if stack_of_event_types[-1] == event.type:
|
||||||
|
continue
|
||||||
|
stack_of_event_types.append(event.type)
|
||||||
|
elif event.type.endswith("done"):
|
||||||
|
assert stack_of_event_types[-1] == pairs_of_event_types[event.type]
|
||||||
|
stack_of_event_types.pop()
|
||||||
|
assert len(stack_of_event_types) == 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||||
@pytest.mark.parametrize("background", [True, False])
|
@pytest.mark.parametrize("background", [True, False])
|
||||||
@ -483,23 +547,7 @@ def call_function(name, args):
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||||
async def test_function_calling(client: OpenAI, model_name: str):
|
async def test_function_calling(client: OpenAI, model_name: str):
|
||||||
tools = [
|
tools = [GET_WEATHER_SCHEMA]
|
||||||
{
|
|
||||||
"type": "function",
|
|
||||||
"name": "get_weather",
|
|
||||||
"description": "Get current temperature for provided coordinates in celsius.", # noqa
|
|
||||||
"parameters": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"latitude": {"type": "number"},
|
|
||||||
"longitude": {"type": "number"},
|
|
||||||
},
|
|
||||||
"required": ["latitude", "longitude"],
|
|
||||||
"additionalProperties": False,
|
|
||||||
},
|
|
||||||
"strict": True,
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
response = await client.responses.create(
|
response = await client.responses.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
@ -565,21 +613,7 @@ async def test_function_calling_multi_turn(client: OpenAI, model_name: str):
|
|||||||
},
|
},
|
||||||
"strict": True,
|
"strict": True,
|
||||||
},
|
},
|
||||||
{
|
GET_WEATHER_SCHEMA,
|
||||||
"type": "function",
|
|
||||||
"name": "get_weather",
|
|
||||||
"description": "Get current temperature for provided coordinates in celsius.", # noqa
|
|
||||||
"parameters": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"latitude": {"type": "number"},
|
|
||||||
"longitude": {"type": "number"},
|
|
||||||
},
|
|
||||||
"required": ["latitude", "longitude"],
|
|
||||||
"additionalProperties": False,
|
|
||||||
},
|
|
||||||
"strict": True,
|
|
||||||
},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
response = await client.responses.create(
|
response = await client.responses.create(
|
||||||
@ -643,23 +677,7 @@ async def test_function_calling_multi_turn(client: OpenAI, model_name: str):
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||||
async def test_function_calling_required(client: OpenAI, model_name: str):
|
async def test_function_calling_required(client: OpenAI, model_name: str):
|
||||||
tools = [
|
tools = [GET_WEATHER_SCHEMA]
|
||||||
{
|
|
||||||
"type": "function",
|
|
||||||
"name": "get_weather",
|
|
||||||
"description": "Get current temperature for provided coordinates in celsius.", # noqa
|
|
||||||
"parameters": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"latitude": {"type": "number"},
|
|
||||||
"longitude": {"type": "number"},
|
|
||||||
},
|
|
||||||
"required": ["latitude", "longitude"],
|
|
||||||
"additionalProperties": False,
|
|
||||||
},
|
|
||||||
"strict": True,
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
with pytest.raises(BadRequestError):
|
with pytest.raises(BadRequestError):
|
||||||
await client.responses.create(
|
await client.responses.create(
|
||||||
@ -689,23 +707,7 @@ async def test_system_message_with_tools(client: OpenAI, model_name: str):
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||||
async def test_function_calling_full_history(client: OpenAI, model_name: str):
|
async def test_function_calling_full_history(client: OpenAI, model_name: str):
|
||||||
tools = [
|
tools = [GET_WEATHER_SCHEMA]
|
||||||
{
|
|
||||||
"type": "function",
|
|
||||||
"name": "get_weather",
|
|
||||||
"description": "Get current temperature for provided coordinates in celsius.", # noqa
|
|
||||||
"parameters": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"latitude": {"type": "number"},
|
|
||||||
"longitude": {"type": "number"},
|
|
||||||
},
|
|
||||||
"required": ["latitude", "longitude"],
|
|
||||||
"additionalProperties": False,
|
|
||||||
},
|
|
||||||
"strict": True,
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
input_messages = [
|
input_messages = [
|
||||||
{"role": "user", "content": "What's the weather like in Paris today?"}
|
{"role": "user", "content": "What's the weather like in Paris today?"}
|
||||||
@ -745,6 +747,74 @@ async def test_function_calling_full_history(client: OpenAI, model_name: str):
|
|||||||
assert response_2.output_text is not None
|
assert response_2.output_text is not None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||||
|
async def test_function_calling_with_stream(client: OpenAI, model_name: str):
|
||||||
|
tools = [GET_WEATHER_SCHEMA]
|
||||||
|
input_list = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What's the weather like in Paris today?",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
stream_response = await client.responses.create(
|
||||||
|
model=model_name,
|
||||||
|
input=input_list,
|
||||||
|
tools=tools,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
assert stream_response is not None
|
||||||
|
final_tool_calls = {}
|
||||||
|
final_tool_calls_named = {}
|
||||||
|
async for event in stream_response:
|
||||||
|
if event.type == "response.output_item.added":
|
||||||
|
if event.item.type != "function_call":
|
||||||
|
continue
|
||||||
|
final_tool_calls[event.output_index] = event.item
|
||||||
|
final_tool_calls_named[event.item.name] = event.item
|
||||||
|
elif event.type == "response.function_call_arguments.delta":
|
||||||
|
index = event.output_index
|
||||||
|
tool_call = final_tool_calls[index]
|
||||||
|
if tool_call:
|
||||||
|
tool_call.arguments += event.delta
|
||||||
|
final_tool_calls_named[tool_call.name] = tool_call
|
||||||
|
elif event.type == "response.function_call_arguments.done":
|
||||||
|
assert event.arguments == final_tool_calls_named[event.name].arguments
|
||||||
|
for tool_call in final_tool_calls.values():
|
||||||
|
if (
|
||||||
|
tool_call
|
||||||
|
and tool_call.type == "function_call"
|
||||||
|
and tool_call.name == "get_weather"
|
||||||
|
):
|
||||||
|
args = json.loads(tool_call.arguments)
|
||||||
|
result = call_function(tool_call.name, args)
|
||||||
|
input_list += [tool_call]
|
||||||
|
break
|
||||||
|
assert result is not None
|
||||||
|
response = await client.responses.create(
|
||||||
|
model=model_name,
|
||||||
|
input=input_list
|
||||||
|
+ [
|
||||||
|
{
|
||||||
|
"type": "function_call_output",
|
||||||
|
"call_id": tool_call.call_id,
|
||||||
|
"output": str(result),
|
||||||
|
}
|
||||||
|
],
|
||||||
|
tools=tools,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
assert response is not None
|
||||||
|
async for event in response:
|
||||||
|
# check that no function call events in the stream
|
||||||
|
assert event.type != "response.function_call_arguments.delta"
|
||||||
|
assert event.type != "response.function_call_arguments.done"
|
||||||
|
# check that the response contains output text
|
||||||
|
if event.type == "response.completed":
|
||||||
|
assert len(event.response.output) > 0
|
||||||
|
assert event.response.output_text is not None
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||||
async def test_output_messages_enabled(client: OpenAI, model_name: str, server):
|
async def test_output_messages_enabled(client: OpenAI, model_name: str, server):
|
||||||
|
|||||||
@ -23,6 +23,8 @@ from openai.types.responses import (
|
|||||||
ResponseCodeInterpreterToolCallParam,
|
ResponseCodeInterpreterToolCallParam,
|
||||||
ResponseContentPartAddedEvent,
|
ResponseContentPartAddedEvent,
|
||||||
ResponseContentPartDoneEvent,
|
ResponseContentPartDoneEvent,
|
||||||
|
ResponseFunctionCallArgumentsDeltaEvent,
|
||||||
|
ResponseFunctionCallArgumentsDoneEvent,
|
||||||
ResponseFunctionToolCall,
|
ResponseFunctionToolCall,
|
||||||
ResponseFunctionWebSearch,
|
ResponseFunctionWebSearch,
|
||||||
ResponseOutputItem,
|
ResponseOutputItem,
|
||||||
@ -927,6 +929,11 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
# to add the tool call request to prev_outputs so that the
|
# to add the tool call request to prev_outputs so that the
|
||||||
# parse_response_input can find the tool call request when
|
# parse_response_input can find the tool call request when
|
||||||
# parsing the tool call output.
|
# parsing the tool call output.
|
||||||
|
if (
|
||||||
|
isinstance(response_msg, dict)
|
||||||
|
and response_msg.get("type") == "function_call"
|
||||||
|
):
|
||||||
|
response_msg = ResponseFunctionToolCall.model_validate(response_msg)
|
||||||
if isinstance(response_msg, ResponseFunctionToolCall):
|
if isinstance(response_msg, ResponseFunctionToolCall):
|
||||||
prev_outputs.append(response_msg)
|
prev_outputs.append(response_msg)
|
||||||
return messages
|
return messages
|
||||||
@ -1398,19 +1405,48 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
current_output_index = 0
|
current_output_index = 0
|
||||||
current_item_id: str = ""
|
current_item_id: str = ""
|
||||||
sent_output_item_added = False
|
sent_output_item_added = False
|
||||||
|
is_first_function_call_delta = False
|
||||||
async for ctx in result_generator:
|
async for ctx in result_generator:
|
||||||
assert isinstance(ctx, StreamingHarmonyContext)
|
assert isinstance(ctx, StreamingHarmonyContext)
|
||||||
|
|
||||||
if ctx.is_expecting_start():
|
if ctx.is_expecting_start():
|
||||||
current_output_index += 1
|
current_output_index += 1
|
||||||
sent_output_item_added = False
|
sent_output_item_added = False
|
||||||
|
is_first_function_call_delta = False
|
||||||
if len(ctx.parser.messages) > 0:
|
if len(ctx.parser.messages) > 0:
|
||||||
previous_item = ctx.parser.messages[-1]
|
previous_item = ctx.parser.messages[-1]
|
||||||
if previous_item.recipient is not None:
|
if previous_item.recipient is not None:
|
||||||
# Deal with tool call here
|
# Deal with tool call
|
||||||
pass
|
if previous_item.recipient.startswith("functions."):
|
||||||
|
function_name = previous_item.recipient[len("functions.") :]
|
||||||
|
yield _increment_sequence_number_and_return(
|
||||||
|
ResponseFunctionCallArgumentsDoneEvent(
|
||||||
|
type="response.function_call_arguments.done",
|
||||||
|
arguments=previous_item.content[0].text,
|
||||||
|
name=function_name,
|
||||||
|
item_id=current_item_id,
|
||||||
|
output_index=current_output_index,
|
||||||
|
sequence_number=-1,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
function_call_item = ResponseFunctionToolCall(
|
||||||
|
type="function_call",
|
||||||
|
arguments=previous_item.content[0].text,
|
||||||
|
name=function_name,
|
||||||
|
item_id=current_item_id,
|
||||||
|
output_index=current_output_index,
|
||||||
|
sequence_number=-1,
|
||||||
|
call_id=f"fc_{random_uuid()}",
|
||||||
|
status="completed",
|
||||||
|
)
|
||||||
|
yield _increment_sequence_number_and_return(
|
||||||
|
ResponseOutputItemDoneEvent(
|
||||||
|
type="response.output_item.done",
|
||||||
|
sequence_number=-1,
|
||||||
|
output_index=current_output_index,
|
||||||
|
item=function_call_item,
|
||||||
|
)
|
||||||
|
)
|
||||||
elif previous_item.channel == "analysis":
|
elif previous_item.channel == "analysis":
|
||||||
content = ResponseReasoningTextContent(
|
content = ResponseReasoningTextContent(
|
||||||
text=previous_item.content[0].text,
|
text=previous_item.content[0].text,
|
||||||
@ -1766,6 +1802,43 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
# developer tools will be triggered on the commentary channel
|
||||||
|
# and recipient starts with "functions.TOOL_NAME"
|
||||||
|
if (
|
||||||
|
ctx.parser.current_channel == "commentary"
|
||||||
|
and ctx.parser.current_recipient
|
||||||
|
and ctx.parser.current_recipient.startswith("functions.")
|
||||||
|
):
|
||||||
|
if is_first_function_call_delta is False:
|
||||||
|
is_first_function_call_delta = True
|
||||||
|
fc_name = ctx.parser.current_recipient[len("functions.") :]
|
||||||
|
tool_call_item = ResponseFunctionToolCall(
|
||||||
|
name=fc_name,
|
||||||
|
type="function_call",
|
||||||
|
id=current_item_id,
|
||||||
|
call_id=f"call_{random_uuid()}",
|
||||||
|
arguments="",
|
||||||
|
status="in_progress",
|
||||||
|
)
|
||||||
|
current_item_id = f"fc_{random_uuid()}"
|
||||||
|
yield _increment_sequence_number_and_return(
|
||||||
|
ResponseOutputItemAddedEvent(
|
||||||
|
type="response.output_item.added",
|
||||||
|
sequence_number=-1,
|
||||||
|
output_index=current_output_index,
|
||||||
|
item=tool_call_item,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
yield _increment_sequence_number_and_return(
|
||||||
|
ResponseFunctionCallArgumentsDeltaEvent(
|
||||||
|
item_id=current_item_id,
|
||||||
|
delta=ctx.parser.last_content_delta,
|
||||||
|
output_index=current_output_index,
|
||||||
|
sequence_number=-1,
|
||||||
|
type="response.function_call_arguments.delta",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
async def responses_stream_generator(
|
async def responses_stream_generator(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user