mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-20 20:27:03 +08:00
[responsesAPI] refactor construct_input_messages (#29359)
Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com>
This commit is contained in:
parent
32c40b95e0
commit
a685b47c57
@ -94,7 +94,7 @@ from vllm.entrypoints.openai.protocol import (
|
|||||||
from vllm.entrypoints.openai.serving_engine import OpenAIServing
|
from vllm.entrypoints.openai.serving_engine import OpenAIServing
|
||||||
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
|
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
|
||||||
from vllm.entrypoints.responses_utils import (
|
from vllm.entrypoints.responses_utils import (
|
||||||
construct_chat_message_with_tool_call,
|
construct_input_messages,
|
||||||
convert_tool_responses_to_completions_format,
|
convert_tool_responses_to_completions_format,
|
||||||
extract_tool_types,
|
extract_tool_types,
|
||||||
)
|
)
|
||||||
@ -504,7 +504,12 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
for tool in request.tools
|
for tool in request.tools
|
||||||
]
|
]
|
||||||
# Construct the input messages.
|
# Construct the input messages.
|
||||||
messages = self._construct_input_messages(request, prev_response)
|
messages = construct_input_messages(
|
||||||
|
request_instructions=request.instructions,
|
||||||
|
request_input=request.input,
|
||||||
|
prev_msg=self.msg_store.get(prev_response.id) if prev_response else None,
|
||||||
|
prev_response_output=prev_response.output if prev_response else None,
|
||||||
|
)
|
||||||
_, request_prompts, engine_prompts = await self._preprocess_chat(
|
_, request_prompts, engine_prompts = await self._preprocess_chat(
|
||||||
request,
|
request,
|
||||||
tokenizer,
|
tokenizer,
|
||||||
@ -869,47 +874,6 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
output_items.extend(last_items)
|
output_items.extend(last_items)
|
||||||
return output_items
|
return output_items
|
||||||
|
|
||||||
def _construct_input_messages(
|
|
||||||
self,
|
|
||||||
request: ResponsesRequest,
|
|
||||||
prev_response: ResponsesResponse | None = None,
|
|
||||||
) -> list[ChatCompletionMessageParam]:
|
|
||||||
messages: list[ChatCompletionMessageParam] = []
|
|
||||||
if request.instructions:
|
|
||||||
messages.append(
|
|
||||||
{
|
|
||||||
"role": "system",
|
|
||||||
"content": request.instructions,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Prepend the conversation history.
|
|
||||||
if prev_response is not None:
|
|
||||||
# Add the previous messages.
|
|
||||||
prev_msg = self.msg_store[prev_response.id]
|
|
||||||
messages.extend(prev_msg)
|
|
||||||
|
|
||||||
# Add the previous output.
|
|
||||||
for output_item in prev_response.output:
|
|
||||||
# NOTE: We skip the reasoning output.
|
|
||||||
if isinstance(output_item, ResponseOutputMessage):
|
|
||||||
for content in output_item.content:
|
|
||||||
messages.append(
|
|
||||||
{
|
|
||||||
"role": "assistant",
|
|
||||||
"content": content.text,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Append the new input.
|
|
||||||
# Responses API supports simple text inputs without chat format.
|
|
||||||
if isinstance(request.input, str):
|
|
||||||
messages.append({"role": "user", "content": request.input})
|
|
||||||
else:
|
|
||||||
for item in request.input:
|
|
||||||
messages.append(construct_chat_message_with_tool_call(item))
|
|
||||||
return messages
|
|
||||||
|
|
||||||
def _construct_harmony_system_input_message(
|
def _construct_harmony_system_input_message(
|
||||||
self, request: ResponsesRequest, with_custom_tools: bool, tool_types: set[str]
|
self, request: ResponsesRequest, with_custom_tools: bool, tool_types: set[str]
|
||||||
) -> OpenAIHarmonyMessage:
|
) -> OpenAIHarmonyMessage:
|
||||||
|
|||||||
@ -9,7 +9,8 @@ from openai.types.chat import (
|
|||||||
from openai.types.chat.chat_completion_message_tool_call_param import (
|
from openai.types.chat.chat_completion_message_tool_call_param import (
|
||||||
Function as FunctionCallTool,
|
Function as FunctionCallTool,
|
||||||
)
|
)
|
||||||
from openai.types.responses import ResponseFunctionToolCall
|
from openai.types.responses import ResponseFunctionToolCall, ResponseOutputItem
|
||||||
|
from openai.types.responses.response_output_message import ResponseOutputMessage
|
||||||
from openai.types.responses.response_reasoning_item import ResponseReasoningItem
|
from openai.types.responses.response_reasoning_item import ResponseReasoningItem
|
||||||
from openai.types.responses.tool import Tool
|
from openai.types.responses.tool import Tool
|
||||||
|
|
||||||
@ -20,6 +21,49 @@ from vllm.entrypoints.openai.protocol import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def construct_input_messages(
|
||||||
|
*,
|
||||||
|
request_instructions: str | None = None,
|
||||||
|
request_input: str | list[ResponseInputOutputItem],
|
||||||
|
prev_msg: list[ChatCompletionMessageParam] | None = None,
|
||||||
|
prev_response_output: list[ResponseOutputItem] | None = None,
|
||||||
|
):
|
||||||
|
messages: list[ChatCompletionMessageParam] = []
|
||||||
|
if request_instructions:
|
||||||
|
messages.append(
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": request_instructions,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Prepend the conversation history.
|
||||||
|
if prev_msg is not None:
|
||||||
|
# Add the previous messages.
|
||||||
|
messages.extend(prev_msg)
|
||||||
|
if prev_response_output is not None:
|
||||||
|
# Add the previous output.
|
||||||
|
for output_item in prev_response_output:
|
||||||
|
# NOTE: We skip the reasoning output.
|
||||||
|
if isinstance(output_item, ResponseOutputMessage):
|
||||||
|
for content in output_item.content:
|
||||||
|
messages.append(
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": content.text,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Append the new input.
|
||||||
|
# Responses API supports simple text inputs without chat format.
|
||||||
|
if isinstance(request_input, str):
|
||||||
|
messages.append({"role": "user", "content": request_input})
|
||||||
|
else:
|
||||||
|
for item in request_input:
|
||||||
|
messages.append(construct_chat_message_with_tool_call(item))
|
||||||
|
return messages
|
||||||
|
|
||||||
|
|
||||||
def construct_chat_message_with_tool_call(
|
def construct_chat_message_with_tool_call(
|
||||||
item: ResponseInputOutputItem,
|
item: ResponseInputOutputItem,
|
||||||
) -> ChatCompletionMessageParam:
|
) -> ChatCompletionMessageParam:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user