[Frontend] refactor harmony utils output message parsing (#29820)

Signed-off-by: Daniel Salib <danielsalib@meta.com>
This commit is contained in:
daniel-salib 2025-12-03 23:36:57 -08:00 committed by GitHub
parent 82a64b3d8f
commit 404fc4bfc0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -328,6 +328,105 @@ def render_for_completion(messages: list[Message]) -> list[int]:
return token_ids return token_ids
def _parse_browser_tool_call(message: Message, recipient: str) -> ResponseOutputItem:
"""Parse browser tool calls (search, open, find) into web search items."""
if len(message.content) != 1:
raise ValueError("Invalid number of contents in browser message")
content = message.content[0]
# Parse JSON args (with retry detection)
try:
browser_call = json.loads(content.text)
except json.JSONDecodeError:
json_retry_output_message = (
f"Invalid JSON args, caught and retried: {content.text}"
)
browser_call = {
"query": json_retry_output_message,
"url": json_retry_output_message,
"pattern": json_retry_output_message,
}
# Create appropriate action based on recipient
if recipient == "browser.search":
action = ActionSearch(
query=f"cursor:{browser_call.get('query', '')}", type="search"
)
elif recipient == "browser.open":
action = ActionOpenPage(
url=f"cursor:{browser_call.get('url', '')}", type="open_page"
)
elif recipient == "browser.find":
action = ActionFind(
pattern=browser_call.get("pattern", ""),
url=f"cursor:{browser_call.get('url', '')}",
type="find",
)
else:
raise ValueError(f"Unknown browser action: {recipient}")
return ResponseFunctionWebSearch(
id=f"ws_{random_uuid()}",
action=action,
status="completed",
type="web_search_call",
)
def _parse_function_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
"""Parse function calls into function tool call items."""
function_name = recipient.split(".")[-1]
output_items = []
for content in message.content:
random_id = random_uuid()
response_item = ResponseFunctionToolCall(
arguments=content.text,
call_id=f"call_{random_id}",
type="function_call",
name=function_name,
id=f"fc_{random_id}",
)
output_items.append(response_item)
return output_items
def _parse_reasoning_content(message: Message) -> list[ResponseOutputItem]:
"""Parse reasoning/analysis content into reasoning items."""
output_items = []
for content in message.content:
reasoning_item = ResponseReasoningItem(
id=f"rs_{random_uuid()}",
summary=[],
type="reasoning",
content=[
ResponseReasoningTextContent(text=content.text, type="reasoning_text")
],
status=None,
)
output_items.append(reasoning_item)
return output_items
def _parse_final_message(message: Message) -> ResponseOutputItem:
"""Parse final channel messages into output message items."""
contents = []
for content in message.content:
output_text = ResponseOutputText(
text=content.text,
annotations=[], # TODO
type="output_text",
logprobs=None, # TODO
)
contents.append(output_text)
return ResponseOutputMessage(
id=f"msg_{random_uuid()}",
content=contents,
role=message.author.role,
status="completed",
type="message",
)
def parse_output_message(message: Message) -> list[ResponseOutputItem]: def parse_output_message(message: Message) -> list[ResponseOutputItem]:
""" """
Parse a Harmony message into a list of output response items. Parse a Harmony message into a list of output response items.
@ -340,119 +439,38 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
output_items: list[ResponseOutputItem] = [] output_items: list[ResponseOutputItem] = []
recipient = message.recipient recipient = message.recipient
# Browser tool calls
if recipient is not None and recipient.startswith("browser."): if recipient is not None and recipient.startswith("browser."):
if len(message.content) != 1: output_items.append(_parse_browser_tool_call(message, recipient))
raise ValueError("Invalid number of contents in browser message")
content = message.content[0] # Analysis channel (reasoning/chain-of-thought)
# We do not need to check the VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY
# env variable since if it is not set, we are certain the json is valid
# The use of Actions for web search will be removed entirely in
# the future, so this is only necessary temporarily
try:
browser_call = json.loads(content.text)
except json.JSONDecodeError:
# If the content is not valid JSON, then it was
# caught and retried by vLLM, which means we
# need to make note of that so the user is aware
json_retry_output_message = (
f"Invalid JSON args, caught and retried: {content.text}"
)
browser_call = {
"query": json_retry_output_message,
"url": json_retry_output_message,
"pattern": json_retry_output_message,
}
# TODO: translate to url properly!
if recipient == "browser.search":
action = ActionSearch(
query=f"cursor:{browser_call.get('query', '')}", type="search"
)
elif recipient == "browser.open":
action = ActionOpenPage(
url=f"cursor:{browser_call.get('url', '')}", type="open_page"
)
elif recipient == "browser.find":
action = ActionFind(
pattern=browser_call["pattern"],
url=f"cursor:{browser_call.get('url', '')}",
type="find",
)
else:
raise ValueError(f"Unknown browser action: {recipient}")
web_search_item = ResponseFunctionWebSearch(
id=f"ws_{random_uuid()}",
action=action,
status="completed",
type="web_search_call",
)
output_items.append(web_search_item)
elif message.channel == "analysis": elif message.channel == "analysis":
for content in message.content: output_items.extend(_parse_reasoning_content(message))
reasoning_item = ResponseReasoningItem(
id=f"rs_{random_uuid()}", # Commentary channel
summary=[],
type="reasoning",
content=[
ResponseReasoningTextContent(
text=content.text, type="reasoning_text"
)
],
status=None,
)
output_items.append(reasoning_item)
elif message.channel == "commentary": elif message.channel == "commentary":
# Function calls
if recipient is not None and recipient.startswith("functions."): if recipient is not None and recipient.startswith("functions."):
function_name = recipient.split(".")[-1] output_items.extend(_parse_function_call(message, recipient))
for content in message.content:
random_id = random_uuid() # Built-in tools on commentary channel are treated as reasoning for now
response_item = ResponseFunctionToolCall(
arguments=content.text,
call_id=f"call_{random_id}",
type="function_call",
name=function_name,
id=f"fc_{random_id}",
)
output_items.append(response_item)
elif recipient is not None and ( elif recipient is not None and (
recipient.startswith("python") recipient.startswith("python")
or recipient.startswith("browser") or recipient.startswith("browser")
or recipient.startswith("container") or recipient.startswith("container")
): ):
for content in message.content: output_items.extend(_parse_reasoning_content(message))
reasoning_item = ResponseReasoningItem(
id=f"rs_{random_uuid()}",
summary=[],
type="reasoning",
content=[
ResponseReasoningTextContent(
text=content.text, type="reasoning_text"
)
],
status=None,
)
output_items.append(reasoning_item)
else: else:
raise ValueError(f"Unknown recipient: {recipient}") raise ValueError(f"Unknown recipient: {recipient}")
# Final output message
elif message.channel == "final": elif message.channel == "final":
contents = [] output_items.append(_parse_final_message(message))
for content in message.content:
output_text = ResponseOutputText(
text=content.text,
annotations=[], # TODO
type="output_text",
logprobs=None, # TODO
)
contents.append(output_text)
text_item = ResponseOutputMessage(
id=f"msg_{random_uuid()}",
content=contents,
role=message.author.role,
status="completed",
type="message",
)
output_items.append(text_item)
else: else:
raise ValueError(f"Unknown channel: {message.channel}") raise ValueError(f"Unknown channel: {message.channel}")
return output_items return output_items