diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index 6f2a50020699c..8ef0d7f277d5f 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import importlib +import importlib.util import json import time @@ -986,3 +987,23 @@ async def test_function_call_with_previous_input_messages( assert ( "aquarius" in output_text or "otter" in output_text or "tuesday" in output_text ) + + +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +async def test_chat_truncation_content_not_null(client: OpenAI, model_name: str): + response = await client.chat.completions.create( + model=model_name, + messages=[{"role": "user", "content": "What is the role of AI in medicine?"}], + temperature=0.0, + max_tokens=250, + ) + + choice = response.choices[0] + assert choice.finish_reason == "length", ( + f"Expected finish_reason='length', got {choice.finish_reason}" + ) + assert choice.message.content is not None, ( + "Content should not be None when truncated" + ) + assert len(choice.message.content) > 0, "Content should not be empty" diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py index 6fb074b8a19ba..d845913b8ee03 100644 --- a/tests/entrypoints/openai/test_serving_chat.py +++ b/tests/entrypoints/openai/test_serving_chat.py @@ -955,7 +955,6 @@ class TestServingChatWithHarmony: input_messages, [ {"role": "system"}, - {"role": "developer"}, {"role": "user", "content": messages[0]["content"]}, ], ) @@ -983,7 +982,6 @@ class TestServingChatWithHarmony: input_messages_2, [ {"role": "system"}, - {"role": "developer"}, {"role": "user"}, # The analysis message should be dropped on subsequent inputs because # of the subsequent assistant message to the final channel. @@ -1043,7 +1041,7 @@ class TestServingChatWithHarmony: ) # Test the Harmony messages for the second turn's input - req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages) + req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools) input_messages_2, _ = serving_chat._make_request_with_harmony(req_2) verify_harmony_messages( input_messages_2, @@ -1124,7 +1122,7 @@ class TestServingChatWithHarmony: ) # Test the Harmony messages for the second turn's input - req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages) + req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools) input_messages_2, _ = serving_chat._make_request_with_harmony(req_2) verify_harmony_messages( input_messages_2, @@ -1205,7 +1203,7 @@ class TestServingChatWithHarmony: ) # Test the Harmony messages for the second turn's input - req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages) + req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools) input_messages_2, _ = serving_chat._make_request_with_harmony(req_2) verify_harmony_messages( input_messages_2, @@ -1255,7 +1253,7 @@ class TestServingChatWithHarmony: ) # Test the Harmony messages for the third turn's input - req_3 = ChatCompletionRequest(model=MODEL_NAME, messages=messages) + req_3 = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools) input_messages_3, _ = serving_chat._make_request_with_harmony(req_3) verify_harmony_messages( input_messages_3, @@ -1318,7 +1316,7 @@ class TestServingChatWithHarmony: ) # Test the Harmony messages for the fourth turn's input - req_4 = ChatCompletionRequest(model=MODEL_NAME, messages=messages) + req_4 = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools) input_messages_4, _ = serving_chat._make_request_with_harmony(req_4) verify_harmony_messages( input_messages_4, @@ -1374,7 +1372,6 @@ class TestServingChatWithHarmony: input_messages, [ {"role": "system"}, - {"role": "developer"}, {"role": "user", "content": messages[0]["content"]}, # The reasoning that would have resulted in an analysis message is # dropped because of a later assistant message to the final channel. @@ -1406,7 +1403,6 @@ class TestServingChatWithHarmony: input_messages, [ {"role": "system"}, - {"role": "developer"}, {"role": "user", "content": messages[0]["content"]}, { "role": "assistant", @@ -1436,7 +1432,6 @@ class TestServingChatWithHarmony: input_messages, [ {"role": "system"}, - {"role": "developer"}, {"role": "user", "content": messages[0]["content"]}, { "role": "assistant", diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index d437d1e5c3b06..88d87a3334955 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -1828,10 +1828,11 @@ class OpenAIServingChat(OpenAIServing): messages.append(sys_msg) # Add developer message. - dev_msg = get_developer_message( - tools=request.tools if should_include_tools else None - ) - messages.append(dev_msg) + if request.tools: + dev_msg = get_developer_message( + tools=request.tools if should_include_tools else None + ) + messages.append(dev_msg) # Add user message. messages.extend(parse_chat_inputs_to_harmony_messages(request.messages)) diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py index db92ea8982d70..da1a9c773f78f 100644 --- a/vllm/tool_parsers/openai_tool_parser.py +++ b/vllm/tool_parsers/openai_tool_parser.py @@ -79,6 +79,15 @@ class OpenAIToolParser(ToolParser): elif msg.channel == "commentary" and not msg.recipient: commentary_content = msg_text + # Extract partial content from the parser state if the generation was truncated + if parser.current_content: + if parser.current_channel == "final": + final_content = parser.current_content + elif ( + parser.current_channel == "commentary" and not parser.current_recipient + ): + commentary_content = parser.current_content + return ExtractedToolCallInformation( tools_called=len(tool_calls) > 0, tool_calls=tool_calls,