diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 06efb43ecb7b8..f546dbda7fef5 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -94,7 +94,7 @@ from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.serving_engine import OpenAIServing from vllm.entrypoints.openai.serving_models import OpenAIServingModels from vllm.entrypoints.responses_utils import ( - construct_chat_message_with_tool_call, + construct_input_messages, convert_tool_responses_to_completions_format, extract_tool_types, ) @@ -504,7 +504,12 @@ class OpenAIServingResponses(OpenAIServing): for tool in request.tools ] # Construct the input messages. - messages = self._construct_input_messages(request, prev_response) + messages = construct_input_messages( + request_instructions=request.instructions, + request_input=request.input, + prev_msg=self.msg_store.get(prev_response.id) if prev_response else None, + prev_response_output=prev_response.output if prev_response else None, + ) _, request_prompts, engine_prompts = await self._preprocess_chat( request, tokenizer, @@ -869,47 +874,6 @@ class OpenAIServingResponses(OpenAIServing): output_items.extend(last_items) return output_items - def _construct_input_messages( - self, - request: ResponsesRequest, - prev_response: ResponsesResponse | None = None, - ) -> list[ChatCompletionMessageParam]: - messages: list[ChatCompletionMessageParam] = [] - if request.instructions: - messages.append( - { - "role": "system", - "content": request.instructions, - } - ) - - # Prepend the conversation history. - if prev_response is not None: - # Add the previous messages. - prev_msg = self.msg_store[prev_response.id] - messages.extend(prev_msg) - - # Add the previous output. - for output_item in prev_response.output: - # NOTE: We skip the reasoning output. - if isinstance(output_item, ResponseOutputMessage): - for content in output_item.content: - messages.append( - { - "role": "assistant", - "content": content.text, - } - ) - - # Append the new input. - # Responses API supports simple text inputs without chat format. - if isinstance(request.input, str): - messages.append({"role": "user", "content": request.input}) - else: - for item in request.input: - messages.append(construct_chat_message_with_tool_call(item)) - return messages - def _construct_harmony_system_input_message( self, request: ResponsesRequest, with_custom_tools: bool, tool_types: set[str] ) -> OpenAIHarmonyMessage: diff --git a/vllm/entrypoints/responses_utils.py b/vllm/entrypoints/responses_utils.py index 912e8a690573d..b02c43c7f8246 100644 --- a/vllm/entrypoints/responses_utils.py +++ b/vllm/entrypoints/responses_utils.py @@ -9,7 +9,8 @@ from openai.types.chat import ( from openai.types.chat.chat_completion_message_tool_call_param import ( Function as FunctionCallTool, ) -from openai.types.responses import ResponseFunctionToolCall +from openai.types.responses import ResponseFunctionToolCall, ResponseOutputItem +from openai.types.responses.response_output_message import ResponseOutputMessage from openai.types.responses.response_reasoning_item import ResponseReasoningItem from openai.types.responses.tool import Tool @@ -20,6 +21,49 @@ from vllm.entrypoints.openai.protocol import ( ) +def construct_input_messages( + *, + request_instructions: str | None = None, + request_input: str | list[ResponseInputOutputItem], + prev_msg: list[ChatCompletionMessageParam] | None = None, + prev_response_output: list[ResponseOutputItem] | None = None, +): + messages: list[ChatCompletionMessageParam] = [] + if request_instructions: + messages.append( + { + "role": "system", + "content": request_instructions, + } + ) + + # Prepend the conversation history. + if prev_msg is not None: + # Add the previous messages. + messages.extend(prev_msg) + if prev_response_output is not None: + # Add the previous output. + for output_item in prev_response_output: + # NOTE: We skip the reasoning output. + if isinstance(output_item, ResponseOutputMessage): + for content in output_item.content: + messages.append( + { + "role": "assistant", + "content": content.text, + } + ) + + # Append the new input. + # Responses API supports simple text inputs without chat format. + if isinstance(request_input, str): + messages.append({"role": "user", "content": request_input}) + else: + for item in request_input: + messages.append(construct_chat_message_with_tool_call(item)) + return messages + + def construct_chat_message_with_tool_call( item: ResponseInputOutputItem, ) -> ChatCompletionMessageParam: