mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-08 08:36:50 +08:00
[BugFix]fix gpt-oss v1/completions response bug (#30608)
Signed-off-by: princepride <wangzhipeng628@gmail.com> Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com> Co-authored-by: Chauncey <chaunceyjiang@gmail.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk> Co-authored-by: bbrowning <bbrownin@redhat.com>
This commit is contained in:
parent
7c73ceb581
commit
3e92b2b7ac
@ -1,6 +1,7 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
import importlib
|
import importlib
|
||||||
|
import importlib.util
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
|
||||||
@ -986,3 +987,23 @@ async def test_function_call_with_previous_input_messages(
|
|||||||
assert (
|
assert (
|
||||||
"aquarius" in output_text or "otter" in output_text or "tuesday" in output_text
|
"aquarius" in output_text or "otter" in output_text or "tuesday" in output_text
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||||
|
async def test_chat_truncation_content_not_null(client: OpenAI, model_name: str):
|
||||||
|
response = await client.chat.completions.create(
|
||||||
|
model=model_name,
|
||||||
|
messages=[{"role": "user", "content": "What is the role of AI in medicine?"}],
|
||||||
|
temperature=0.0,
|
||||||
|
max_tokens=250,
|
||||||
|
)
|
||||||
|
|
||||||
|
choice = response.choices[0]
|
||||||
|
assert choice.finish_reason == "length", (
|
||||||
|
f"Expected finish_reason='length', got {choice.finish_reason}"
|
||||||
|
)
|
||||||
|
assert choice.message.content is not None, (
|
||||||
|
"Content should not be None when truncated"
|
||||||
|
)
|
||||||
|
assert len(choice.message.content) > 0, "Content should not be empty"
|
||||||
|
|||||||
@ -955,7 +955,6 @@ class TestServingChatWithHarmony:
|
|||||||
input_messages,
|
input_messages,
|
||||||
[
|
[
|
||||||
{"role": "system"},
|
{"role": "system"},
|
||||||
{"role": "developer"},
|
|
||||||
{"role": "user", "content": messages[0]["content"]},
|
{"role": "user", "content": messages[0]["content"]},
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@ -983,7 +982,6 @@ class TestServingChatWithHarmony:
|
|||||||
input_messages_2,
|
input_messages_2,
|
||||||
[
|
[
|
||||||
{"role": "system"},
|
{"role": "system"},
|
||||||
{"role": "developer"},
|
|
||||||
{"role": "user"},
|
{"role": "user"},
|
||||||
# The analysis message should be dropped on subsequent inputs because
|
# The analysis message should be dropped on subsequent inputs because
|
||||||
# of the subsequent assistant message to the final channel.
|
# of the subsequent assistant message to the final channel.
|
||||||
@ -1043,7 +1041,7 @@ class TestServingChatWithHarmony:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Test the Harmony messages for the second turn's input
|
# Test the Harmony messages for the second turn's input
|
||||||
req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools)
|
||||||
input_messages_2, _ = serving_chat._make_request_with_harmony(req_2)
|
input_messages_2, _ = serving_chat._make_request_with_harmony(req_2)
|
||||||
verify_harmony_messages(
|
verify_harmony_messages(
|
||||||
input_messages_2,
|
input_messages_2,
|
||||||
@ -1124,7 +1122,7 @@ class TestServingChatWithHarmony:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Test the Harmony messages for the second turn's input
|
# Test the Harmony messages for the second turn's input
|
||||||
req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools)
|
||||||
input_messages_2, _ = serving_chat._make_request_with_harmony(req_2)
|
input_messages_2, _ = serving_chat._make_request_with_harmony(req_2)
|
||||||
verify_harmony_messages(
|
verify_harmony_messages(
|
||||||
input_messages_2,
|
input_messages_2,
|
||||||
@ -1205,7 +1203,7 @@ class TestServingChatWithHarmony:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Test the Harmony messages for the second turn's input
|
# Test the Harmony messages for the second turn's input
|
||||||
req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools)
|
||||||
input_messages_2, _ = serving_chat._make_request_with_harmony(req_2)
|
input_messages_2, _ = serving_chat._make_request_with_harmony(req_2)
|
||||||
verify_harmony_messages(
|
verify_harmony_messages(
|
||||||
input_messages_2,
|
input_messages_2,
|
||||||
@ -1255,7 +1253,7 @@ class TestServingChatWithHarmony:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Test the Harmony messages for the third turn's input
|
# Test the Harmony messages for the third turn's input
|
||||||
req_3 = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
req_3 = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools)
|
||||||
input_messages_3, _ = serving_chat._make_request_with_harmony(req_3)
|
input_messages_3, _ = serving_chat._make_request_with_harmony(req_3)
|
||||||
verify_harmony_messages(
|
verify_harmony_messages(
|
||||||
input_messages_3,
|
input_messages_3,
|
||||||
@ -1318,7 +1316,7 @@ class TestServingChatWithHarmony:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Test the Harmony messages for the fourth turn's input
|
# Test the Harmony messages for the fourth turn's input
|
||||||
req_4 = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
req_4 = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools)
|
||||||
input_messages_4, _ = serving_chat._make_request_with_harmony(req_4)
|
input_messages_4, _ = serving_chat._make_request_with_harmony(req_4)
|
||||||
verify_harmony_messages(
|
verify_harmony_messages(
|
||||||
input_messages_4,
|
input_messages_4,
|
||||||
@ -1374,7 +1372,6 @@ class TestServingChatWithHarmony:
|
|||||||
input_messages,
|
input_messages,
|
||||||
[
|
[
|
||||||
{"role": "system"},
|
{"role": "system"},
|
||||||
{"role": "developer"},
|
|
||||||
{"role": "user", "content": messages[0]["content"]},
|
{"role": "user", "content": messages[0]["content"]},
|
||||||
# The reasoning that would have resulted in an analysis message is
|
# The reasoning that would have resulted in an analysis message is
|
||||||
# dropped because of a later assistant message to the final channel.
|
# dropped because of a later assistant message to the final channel.
|
||||||
@ -1406,7 +1403,6 @@ class TestServingChatWithHarmony:
|
|||||||
input_messages,
|
input_messages,
|
||||||
[
|
[
|
||||||
{"role": "system"},
|
{"role": "system"},
|
||||||
{"role": "developer"},
|
|
||||||
{"role": "user", "content": messages[0]["content"]},
|
{"role": "user", "content": messages[0]["content"]},
|
||||||
{
|
{
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
@ -1436,7 +1432,6 @@ class TestServingChatWithHarmony:
|
|||||||
input_messages,
|
input_messages,
|
||||||
[
|
[
|
||||||
{"role": "system"},
|
{"role": "system"},
|
||||||
{"role": "developer"},
|
|
||||||
{"role": "user", "content": messages[0]["content"]},
|
{"role": "user", "content": messages[0]["content"]},
|
||||||
{
|
{
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
|
|||||||
@ -1828,10 +1828,11 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
messages.append(sys_msg)
|
messages.append(sys_msg)
|
||||||
|
|
||||||
# Add developer message.
|
# Add developer message.
|
||||||
dev_msg = get_developer_message(
|
if request.tools:
|
||||||
tools=request.tools if should_include_tools else None
|
dev_msg = get_developer_message(
|
||||||
)
|
tools=request.tools if should_include_tools else None
|
||||||
messages.append(dev_msg)
|
)
|
||||||
|
messages.append(dev_msg)
|
||||||
|
|
||||||
# Add user message.
|
# Add user message.
|
||||||
messages.extend(parse_chat_inputs_to_harmony_messages(request.messages))
|
messages.extend(parse_chat_inputs_to_harmony_messages(request.messages))
|
||||||
|
|||||||
@ -79,6 +79,15 @@ class OpenAIToolParser(ToolParser):
|
|||||||
elif msg.channel == "commentary" and not msg.recipient:
|
elif msg.channel == "commentary" and not msg.recipient:
|
||||||
commentary_content = msg_text
|
commentary_content = msg_text
|
||||||
|
|
||||||
|
# Extract partial content from the parser state if the generation was truncated
|
||||||
|
if parser.current_content:
|
||||||
|
if parser.current_channel == "final":
|
||||||
|
final_content = parser.current_content
|
||||||
|
elif (
|
||||||
|
parser.current_channel == "commentary" and not parser.current_recipient
|
||||||
|
):
|
||||||
|
commentary_content = parser.current_content
|
||||||
|
|
||||||
return ExtractedToolCallInformation(
|
return ExtractedToolCallInformation(
|
||||||
tools_called=len(tool_calls) > 0,
|
tools_called=len(tool_calls) > 0,
|
||||||
tool_calls=tool_calls,
|
tool_calls=tool_calls,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user