[BugFix][gpt-oss] Fix Chat Completion with Multiple Output Message (#23318)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
2026-01-29 16:27:14 +08:00 · 2025-08-21 10:31:11 -07:00 · 2025-08-21 10:31:11 -07:00 · 8a19303173
commit 8a19303173
parent 603fbbbce0
1 changed files with 5 additions and 9 deletions
--- a/vllm/entrypoints/harmony_utils.py
+++ b/vllm/entrypoints/harmony_utils.py
@ -329,23 +329,19 @@ def parse_chat_output(
        token_ids: Sequence[int]) -> tuple[Optional[str], Optional[str], bool]:
    parser = parse_output_into_messages(token_ids)
    output_msgs = parser.messages
+    is_tool_call = False  # TODO: update this when tool call is supported
    if len(output_msgs) == 0:
        # The generation has stopped during reasoning.
-        is_tool_call = False
        reasoning_content = parser.current_content
        final_content = None
    elif len(output_msgs) == 1:
        # The generation has stopped during final message.
-        is_tool_call = False
        reasoning_content = output_msgs[0].content[0].text
        final_content = parser.current_content
    else:
-        if len(output_msgs) != 2:
-            raise ValueError(
-                "Expected 2 output messages (reasoning and final), "
-                f"but got {len(output_msgs)}.")
-        reasoning_msg, final_msg = output_msgs
-        reasoning_content = reasoning_msg.content[0].text
+        reasoning_msg = output_msgs[:-1]
+        final_msg = output_msgs[-1]
+        reasoning_content = "\n".join(
+            [msg.content[0].text for msg in reasoning_msg])
        final_content = final_msg.content[0].text
-        is_tool_call = final_msg.recipient is not None
    return reasoning_content, final_content, is_tool_call