From ea38474ac564efdc09762ad066139b75cf68f924 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mads=20Kildeg=C3=A5rd?= Date: Sat, 22 Nov 2025 10:58:22 +0100 Subject: [PATCH] [Frontend][Responses API] Multi-turn (with type: "output_text") support for non-harmony requests (#29175) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mads KildegÄrd --- vllm/entrypoints/chat_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index aaf8a3ae9d2dd..bf80856c1bbfc 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -1283,6 +1283,7 @@ MM_PARSER_MAP: dict[ "text": lambda part: _TextParser(part).get("text", None), "thinking": lambda part: _ThinkParser(part).get("thinking", None), "input_text": lambda part: _TextParser(part).get("text", None), + "output_text": lambda part: _TextParser(part).get("text", None), "input_image": lambda part: _ResponsesInputImageParser(part).get("image_url", None), "image_url": lambda part: _ImageParser(part).get("image_url", {}).get("url", None), "image_embeds": lambda part: _ImageEmbedsParser(part).get("image_embeds", None), @@ -1463,7 +1464,7 @@ def _parse_chat_message_content_part( ) return None - if part_type in ("text", "input_text", "refusal", "thinking"): + if part_type in ("text", "input_text", "output_text", "refusal", "thinking"): str_content = cast(str, content) if wrap_dicts: return {"type": "text", "text": str_content}