mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-05 12:17:28 +08:00
[responsesAPI][bugfix] serialize harmony messages (#26185)
Signed-off-by: Andrew Xia <axia@meta.com> Co-authored-by: Ye (Charlotte) Qi <yeq@meta.com>
This commit is contained in:
parent
d9836d4517
commit
185d8ed44f
@ -8,6 +8,9 @@ import pytest
|
||||
import pytest_asyncio
|
||||
import requests
|
||||
from openai import BadRequestError, NotFoundError, OpenAI
|
||||
from openai_harmony import (
|
||||
Message,
|
||||
)
|
||||
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
@ -326,6 +329,7 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool):
|
||||
],
|
||||
stream=True,
|
||||
background=background,
|
||||
extra_body={"enable_response_messages": True},
|
||||
)
|
||||
|
||||
current_item_id = ""
|
||||
@ -334,6 +338,7 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool):
|
||||
events = []
|
||||
current_event_mode = None
|
||||
resp_id = None
|
||||
checked_response_completed = False
|
||||
async for event in response:
|
||||
if event.type == "response.created":
|
||||
resp_id = event.response.id
|
||||
@ -346,6 +351,16 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool):
|
||||
]:
|
||||
assert "input_messages" in event.response.model_extra
|
||||
assert "output_messages" in event.response.model_extra
|
||||
if event.type == "response.completed":
|
||||
# make sure the serialization of content works
|
||||
for msg in event.response.model_extra["output_messages"]:
|
||||
# make sure we can convert the messages back into harmony
|
||||
Message.from_dict(msg)
|
||||
|
||||
for msg in event.response.model_extra["input_messages"]:
|
||||
# make sure we can convert the messages back into harmony
|
||||
Message.from_dict(msg)
|
||||
checked_response_completed = True
|
||||
|
||||
if current_event_mode != event.type:
|
||||
current_event_mode = event.type
|
||||
@ -390,6 +405,7 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool):
|
||||
assert len(events) > 0
|
||||
response_completed_event = events[-1]
|
||||
assert len(response_completed_event.response.output) > 0
|
||||
assert checked_response_completed
|
||||
|
||||
if background:
|
||||
starting_after = 5
|
||||
|
||||
@ -63,6 +63,7 @@ from pydantic import (
|
||||
Field,
|
||||
TypeAdapter,
|
||||
ValidationInfo,
|
||||
field_serializer,
|
||||
field_validator,
|
||||
model_validator,
|
||||
)
|
||||
@ -2078,11 +2079,6 @@ class ResponsesResponse(OpenAIBaseModel):
|
||||
model: str
|
||||
object: Literal["response"] = "response"
|
||||
output: list[ResponseOutputItem]
|
||||
# These are populated when enable_response_messages is set to True
|
||||
# TODO: Currently an issue where content of harmony messages
|
||||
# is not available when these are serialized. Metadata is available
|
||||
input_messages: Optional[list[ChatCompletionMessageParam]] = None
|
||||
output_messages: Optional[list[ChatCompletionMessageParam]] = None
|
||||
parallel_tool_calls: bool
|
||||
temperature: float
|
||||
tool_choice: ToolChoice
|
||||
@ -2102,6 +2098,49 @@ class ResponsesResponse(OpenAIBaseModel):
|
||||
usage: Optional[ResponseUsage] = None
|
||||
user: Optional[str] = None
|
||||
|
||||
# --8<-- [start:responses-extra-params]
|
||||
# These are populated when enable_response_messages is set to True
|
||||
# NOTE: custom serialization is needed
|
||||
# see serialize_input_messages and serialize_output_messages
|
||||
input_messages: Optional[list[ChatCompletionMessageParam]] = None
|
||||
output_messages: Optional[list[ChatCompletionMessageParam]] = None
|
||||
# --8<-- [end:responses-extra-params]
|
||||
|
||||
# NOTE: openAI harmony doesn't serialize TextContent properly,
|
||||
# TODO: this fixes for TextContent, but need to verify for tools etc
|
||||
# https://github.com/openai/harmony/issues/78
|
||||
@field_serializer("output_messages", when_used="json")
|
||||
def serialize_output_messages(self, msgs, _info):
|
||||
if msgs:
|
||||
serialized = []
|
||||
for m in msgs:
|
||||
if isinstance(m, dict):
|
||||
serialized.append(m)
|
||||
elif hasattr(m, "__dict__"):
|
||||
serialized.append(m.to_dict())
|
||||
else:
|
||||
# fallback to pyandic dump
|
||||
serialized.append(m.model_dump_json())
|
||||
return serialized
|
||||
return None
|
||||
|
||||
# NOTE: openAI harmony doesn't serialize TextContent properly, this fixes it
|
||||
# https://github.com/openai/harmony/issues/78
|
||||
@field_serializer("input_messages", when_used="json")
|
||||
def serialize_input_messages(self, msgs, _info):
|
||||
if msgs:
|
||||
serialized = []
|
||||
for m in msgs:
|
||||
if isinstance(m, dict):
|
||||
serialized.append(m)
|
||||
elif hasattr(m, "__dict__"):
|
||||
serialized.append(m.to_dict())
|
||||
else:
|
||||
# fallback to pyandic dump
|
||||
serialized.append(m.model_dump_json())
|
||||
return serialized
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def from_request(
|
||||
cls,
|
||||
|
||||
@ -1876,6 +1876,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
ResponseCompletedEvent(
|
||||
type="response.completed",
|
||||
sequence_number=-1,
|
||||
response=final_response.model_dump(),
|
||||
response=final_response,
|
||||
)
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user