diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index 57d88f84d2519..4251d06435c11 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -16,6 +16,22 @@ from ...utils import RemoteOpenAIServer MODEL_NAME = "openai/gpt-oss-20b" +GET_WEATHER_SCHEMA = { + "type": "function", + "name": "get_weather", + "description": "Get current temperature for provided coordinates in celsius.", # noqa + "parameters": { + "type": "object", + "properties": { + "latitude": {"type": "number"}, + "longitude": {"type": "number"}, + }, + "required": ["latitude", "longitude"], + "additionalProperties": False, + }, + "strict": True, +} + @pytest.fixture(scope="module") def server(): @@ -305,6 +321,54 @@ async def test_streaming_types(client: OpenAI, model_name: str): assert len(stack_of_event_types) == 0 +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +async def test_function_calling_with_streaming_types(client: OpenAI, model_name: str): + # this links the "done" type with the "start" type + # so every "done" type should have a corresponding "start" type + # and every open block should be closed by the end of the stream + pairs_of_event_types = { + "response.completed": "response.created", + "response.output_item.done": "response.output_item.added", + "response.output_text.done": "response.output_text.delta", + "response.reasoning_text.done": "response.reasoning_text.delta", + "response.reasoning_part.done": "response.reasoning_part.added", + "response.function_call_arguments.done": "response.function_call_arguments.delta", # noqa + } + + tools = [GET_WEATHER_SCHEMA] + input_list = [ + { + "role": "user", + "content": "What's the weather like in Paris today?", + } + ] + stream_response = await client.responses.create( + model=model_name, + input=input_list, + tools=tools, + stream=True, + ) + + stack_of_event_types = [] + async for event in stream_response: + if event.type == "response.created": + stack_of_event_types.append(event.type) + elif event.type == "response.completed": + assert stack_of_event_types[-1] == pairs_of_event_types[event.type] + stack_of_event_types.pop() + if event.type.endswith("added"): + stack_of_event_types.append(event.type) + elif event.type.endswith("delta"): + if stack_of_event_types[-1] == event.type: + continue + stack_of_event_types.append(event.type) + elif event.type.endswith("done"): + assert stack_of_event_types[-1] == pairs_of_event_types[event.type] + stack_of_event_types.pop() + assert len(stack_of_event_types) == 0 + + @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("background", [True, False]) @@ -483,23 +547,7 @@ def call_function(name, args): @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) async def test_function_calling(client: OpenAI, model_name: str): - tools = [ - { - "type": "function", - "name": "get_weather", - "description": "Get current temperature for provided coordinates in celsius.", # noqa - "parameters": { - "type": "object", - "properties": { - "latitude": {"type": "number"}, - "longitude": {"type": "number"}, - }, - "required": ["latitude", "longitude"], - "additionalProperties": False, - }, - "strict": True, - } - ] + tools = [GET_WEATHER_SCHEMA] response = await client.responses.create( model=model_name, @@ -565,21 +613,7 @@ async def test_function_calling_multi_turn(client: OpenAI, model_name: str): }, "strict": True, }, - { - "type": "function", - "name": "get_weather", - "description": "Get current temperature for provided coordinates in celsius.", # noqa - "parameters": { - "type": "object", - "properties": { - "latitude": {"type": "number"}, - "longitude": {"type": "number"}, - }, - "required": ["latitude", "longitude"], - "additionalProperties": False, - }, - "strict": True, - }, + GET_WEATHER_SCHEMA, ] response = await client.responses.create( @@ -643,23 +677,7 @@ async def test_function_calling_multi_turn(client: OpenAI, model_name: str): @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) async def test_function_calling_required(client: OpenAI, model_name: str): - tools = [ - { - "type": "function", - "name": "get_weather", - "description": "Get current temperature for provided coordinates in celsius.", # noqa - "parameters": { - "type": "object", - "properties": { - "latitude": {"type": "number"}, - "longitude": {"type": "number"}, - }, - "required": ["latitude", "longitude"], - "additionalProperties": False, - }, - "strict": True, - } - ] + tools = [GET_WEATHER_SCHEMA] with pytest.raises(BadRequestError): await client.responses.create( @@ -689,23 +707,7 @@ async def test_system_message_with_tools(client: OpenAI, model_name: str): @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) async def test_function_calling_full_history(client: OpenAI, model_name: str): - tools = [ - { - "type": "function", - "name": "get_weather", - "description": "Get current temperature for provided coordinates in celsius.", # noqa - "parameters": { - "type": "object", - "properties": { - "latitude": {"type": "number"}, - "longitude": {"type": "number"}, - }, - "required": ["latitude", "longitude"], - "additionalProperties": False, - }, - "strict": True, - } - ] + tools = [GET_WEATHER_SCHEMA] input_messages = [ {"role": "user", "content": "What's the weather like in Paris today?"} @@ -745,6 +747,74 @@ async def test_function_calling_full_history(client: OpenAI, model_name: str): assert response_2.output_text is not None +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +async def test_function_calling_with_stream(client: OpenAI, model_name: str): + tools = [GET_WEATHER_SCHEMA] + input_list = [ + { + "role": "user", + "content": "What's the weather like in Paris today?", + } + ] + stream_response = await client.responses.create( + model=model_name, + input=input_list, + tools=tools, + stream=True, + ) + assert stream_response is not None + final_tool_calls = {} + final_tool_calls_named = {} + async for event in stream_response: + if event.type == "response.output_item.added": + if event.item.type != "function_call": + continue + final_tool_calls[event.output_index] = event.item + final_tool_calls_named[event.item.name] = event.item + elif event.type == "response.function_call_arguments.delta": + index = event.output_index + tool_call = final_tool_calls[index] + if tool_call: + tool_call.arguments += event.delta + final_tool_calls_named[tool_call.name] = tool_call + elif event.type == "response.function_call_arguments.done": + assert event.arguments == final_tool_calls_named[event.name].arguments + for tool_call in final_tool_calls.values(): + if ( + tool_call + and tool_call.type == "function_call" + and tool_call.name == "get_weather" + ): + args = json.loads(tool_call.arguments) + result = call_function(tool_call.name, args) + input_list += [tool_call] + break + assert result is not None + response = await client.responses.create( + model=model_name, + input=input_list + + [ + { + "type": "function_call_output", + "call_id": tool_call.call_id, + "output": str(result), + } + ], + tools=tools, + stream=True, + ) + assert response is not None + async for event in response: + # check that no function call events in the stream + assert event.type != "response.function_call_arguments.delta" + assert event.type != "response.function_call_arguments.done" + # check that the response contains output text + if event.type == "response.completed": + assert len(event.response.output) > 0 + assert event.response.output_text is not None + + @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) async def test_output_messages_enabled(client: OpenAI, model_name: str, server): diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 744df98a4278e..51e2856a5a9dd 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -23,6 +23,8 @@ from openai.types.responses import ( ResponseCodeInterpreterToolCallParam, ResponseContentPartAddedEvent, ResponseContentPartDoneEvent, + ResponseFunctionCallArgumentsDeltaEvent, + ResponseFunctionCallArgumentsDoneEvent, ResponseFunctionToolCall, ResponseFunctionWebSearch, ResponseOutputItem, @@ -927,6 +929,11 @@ class OpenAIServingResponses(OpenAIServing): # to add the tool call request to prev_outputs so that the # parse_response_input can find the tool call request when # parsing the tool call output. + if ( + isinstance(response_msg, dict) + and response_msg.get("type") == "function_call" + ): + response_msg = ResponseFunctionToolCall.model_validate(response_msg) if isinstance(response_msg, ResponseFunctionToolCall): prev_outputs.append(response_msg) return messages @@ -1398,19 +1405,48 @@ class OpenAIServingResponses(OpenAIServing): current_output_index = 0 current_item_id: str = "" sent_output_item_added = False - + is_first_function_call_delta = False async for ctx in result_generator: assert isinstance(ctx, StreamingHarmonyContext) if ctx.is_expecting_start(): current_output_index += 1 sent_output_item_added = False - + is_first_function_call_delta = False if len(ctx.parser.messages) > 0: previous_item = ctx.parser.messages[-1] if previous_item.recipient is not None: - # Deal with tool call here - pass + # Deal with tool call + if previous_item.recipient.startswith("functions."): + function_name = previous_item.recipient[len("functions.") :] + yield _increment_sequence_number_and_return( + ResponseFunctionCallArgumentsDoneEvent( + type="response.function_call_arguments.done", + arguments=previous_item.content[0].text, + name=function_name, + item_id=current_item_id, + output_index=current_output_index, + sequence_number=-1, + ) + ) + function_call_item = ResponseFunctionToolCall( + type="function_call", + arguments=previous_item.content[0].text, + name=function_name, + item_id=current_item_id, + output_index=current_output_index, + sequence_number=-1, + call_id=f"fc_{random_uuid()}", + status="completed", + ) + yield _increment_sequence_number_and_return( + ResponseOutputItemDoneEvent( + type="response.output_item.done", + sequence_number=-1, + output_index=current_output_index, + item=function_call_item, + ) + ) elif previous_item.channel == "analysis": content = ResponseReasoningTextContent( text=previous_item.content[0].text, @@ -1766,6 +1802,43 @@ class OpenAIServingResponses(OpenAIServing): ), ) ) + # developer tools will be triggered on the commentary channel + # and recipient starts with "functions.TOOL_NAME" + if ( + ctx.parser.current_channel == "commentary" + and ctx.parser.current_recipient + and ctx.parser.current_recipient.startswith("functions.") + ): + if is_first_function_call_delta is False: + is_first_function_call_delta = True + fc_name = ctx.parser.current_recipient[len("functions.") :] + tool_call_item = ResponseFunctionToolCall( + name=fc_name, + type="function_call", + id=current_item_id, + call_id=f"call_{random_uuid()}", + arguments="", + status="in_progress", + ) + current_item_id = f"fc_{random_uuid()}" + yield _increment_sequence_number_and_return( + ResponseOutputItemAddedEvent( + type="response.output_item.added", + sequence_number=-1, + output_index=current_output_index, + item=tool_call_item, + ) + ) + else: + yield _increment_sequence_number_and_return( + ResponseFunctionCallArgumentsDeltaEvent( + item_id=current_item_id, + delta=ctx.parser.last_content_delta, + output_index=current_output_index, + sequence_number=-1, + type="response.function_call_arguments.delta", + ) + ) async def responses_stream_generator( self,