From 185d8ed44f382fc2b066e4d2eabd3c1b5fcbd8db Mon Sep 17 00:00:00 2001 From: Andrew Xia Date: Tue, 7 Oct 2025 00:07:53 -0700 Subject: [PATCH] [responsesAPI][bugfix] serialize harmony messages (#26185) Signed-off-by: Andrew Xia Co-authored-by: Ye (Charlotte) Qi --- .../openai/test_response_api_with_harmony.py | 16 ++++++ vllm/entrypoints/openai/protocol.py | 49 +++++++++++++++++-- vllm/entrypoints/openai/serving_responses.py | 2 +- 3 files changed, 61 insertions(+), 6 deletions(-) diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index fb0035de67c26..57d88f84d2519 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -8,6 +8,9 @@ import pytest import pytest_asyncio import requests from openai import BadRequestError, NotFoundError, OpenAI +from openai_harmony import ( + Message, +) from ...utils import RemoteOpenAIServer @@ -326,6 +329,7 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool): ], stream=True, background=background, + extra_body={"enable_response_messages": True}, ) current_item_id = "" @@ -334,6 +338,7 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool): events = [] current_event_mode = None resp_id = None + checked_response_completed = False async for event in response: if event.type == "response.created": resp_id = event.response.id @@ -346,6 +351,16 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool): ]: assert "input_messages" in event.response.model_extra assert "output_messages" in event.response.model_extra + if event.type == "response.completed": + # make sure the serialization of content works + for msg in event.response.model_extra["output_messages"]: + # make sure we can convert the messages back into harmony + Message.from_dict(msg) + + for msg in event.response.model_extra["input_messages"]: + # make sure we can convert the messages back into harmony + Message.from_dict(msg) + checked_response_completed = True if current_event_mode != event.type: current_event_mode = event.type @@ -390,6 +405,7 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool): assert len(events) > 0 response_completed_event = events[-1] assert len(response_completed_event.response.output) > 0 + assert checked_response_completed if background: starting_after = 5 diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index a92e8372b304e..6ff7ceef48055 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -63,6 +63,7 @@ from pydantic import ( Field, TypeAdapter, ValidationInfo, + field_serializer, field_validator, model_validator, ) @@ -2078,11 +2079,6 @@ class ResponsesResponse(OpenAIBaseModel): model: str object: Literal["response"] = "response" output: list[ResponseOutputItem] - # These are populated when enable_response_messages is set to True - # TODO: Currently an issue where content of harmony messages - # is not available when these are serialized. Metadata is available - input_messages: Optional[list[ChatCompletionMessageParam]] = None - output_messages: Optional[list[ChatCompletionMessageParam]] = None parallel_tool_calls: bool temperature: float tool_choice: ToolChoice @@ -2102,6 +2098,49 @@ class ResponsesResponse(OpenAIBaseModel): usage: Optional[ResponseUsage] = None user: Optional[str] = None + # --8<-- [start:responses-extra-params] + # These are populated when enable_response_messages is set to True + # NOTE: custom serialization is needed + # see serialize_input_messages and serialize_output_messages + input_messages: Optional[list[ChatCompletionMessageParam]] = None + output_messages: Optional[list[ChatCompletionMessageParam]] = None + # --8<-- [end:responses-extra-params] + + # NOTE: openAI harmony doesn't serialize TextContent properly, + # TODO: this fixes for TextContent, but need to verify for tools etc + # https://github.com/openai/harmony/issues/78 + @field_serializer("output_messages", when_used="json") + def serialize_output_messages(self, msgs, _info): + if msgs: + serialized = [] + for m in msgs: + if isinstance(m, dict): + serialized.append(m) + elif hasattr(m, "__dict__"): + serialized.append(m.to_dict()) + else: + # fallback to pyandic dump + serialized.append(m.model_dump_json()) + return serialized + return None + + # NOTE: openAI harmony doesn't serialize TextContent properly, this fixes it + # https://github.com/openai/harmony/issues/78 + @field_serializer("input_messages", when_used="json") + def serialize_input_messages(self, msgs, _info): + if msgs: + serialized = [] + for m in msgs: + if isinstance(m, dict): + serialized.append(m) + elif hasattr(m, "__dict__"): + serialized.append(m.to_dict()) + else: + # fallback to pyandic dump + serialized.append(m.model_dump_json()) + return serialized + return None + @classmethod def from_request( cls, diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 1b25fd4eb27ed..998c279eea04b 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -1876,6 +1876,6 @@ class OpenAIServingResponses(OpenAIServing): ResponseCompletedEvent( type="response.completed", sequence_number=-1, - response=final_response.model_dump(), + response=final_response, ) )