mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 00:25:49 +08:00
[responsesAPI] parse reasoning item input (#28248)
Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
parent
e9056056fb
commit
742e9ff6b3
44
examples/online_serving/openai_responses_client.py
Normal file
44
examples/online_serving/openai_responses_client.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
"""
|
||||||
|
Set up this example by starting a vLLM OpenAI-compatible server.
|
||||||
|
Reasoning models can be used through the Responses API as seen here
|
||||||
|
https://platform.openai.com/docs/api-reference/responses
|
||||||
|
For example:
|
||||||
|
vllm serve Qwen/Qwen3-8B --reasoning-parser qwen3
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
input_messages = [{"role": "user", "content": "What model are you?"}]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
base_url = "http://localhost:8000/v1"
|
||||||
|
client = OpenAI(base_url=base_url, api_key="empty")
|
||||||
|
model = "Qwen/Qwen3-8B" # get_first_model(client)
|
||||||
|
response = client.responses.create(
|
||||||
|
model=model,
|
||||||
|
input=input_messages,
|
||||||
|
)
|
||||||
|
|
||||||
|
for message in response.output:
|
||||||
|
if message.type == "reasoning":
|
||||||
|
# append reasoning message
|
||||||
|
input_messages.append(message)
|
||||||
|
|
||||||
|
response_2 = client.responses.create(
|
||||||
|
model=model,
|
||||||
|
input=input_messages,
|
||||||
|
)
|
||||||
|
print(response_2.output_text)
|
||||||
|
# I am Qwen, a large language model developed by Alibaba Cloud.
|
||||||
|
# I am designed to assist with a wide range of tasks, including
|
||||||
|
# answering questions, creating content, coding, and engaging in
|
||||||
|
# conversations. I can help with various topics and provide
|
||||||
|
# information or support in multiple languages. How can I assist you today?
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
71
tests/entrypoints/openai/test_response_api_simple.py
Normal file
71
tests/entrypoints/openai/test_response_api_simple.py
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import pytest_asyncio
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
from ...utils import RemoteOpenAIServer
|
||||||
|
|
||||||
|
MODEL_NAME = "Qwen/Qwen3-8B"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def server():
|
||||||
|
args = ["--reasoning-parser", "qwen3", "--max_model_len", "5000"]
|
||||||
|
env_dict = dict(
|
||||||
|
VLLM_ENABLE_RESPONSES_API_STORE="1",
|
||||||
|
# uncomment for tool calling
|
||||||
|
# PYTHON_EXECUTION_BACKEND="dangerously_use_uv",
|
||||||
|
)
|
||||||
|
|
||||||
|
with RemoteOpenAIServer(MODEL_NAME, args, env_dict=env_dict) as remote_server:
|
||||||
|
yield remote_server
|
||||||
|
|
||||||
|
|
||||||
|
@pytest_asyncio.fixture
|
||||||
|
async def client(server):
|
||||||
|
async with server.get_async_client() as async_client:
|
||||||
|
yield async_client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||||
|
async def test_basic(client: OpenAI, model_name: str):
|
||||||
|
response = await client.responses.create(
|
||||||
|
model=model_name,
|
||||||
|
input="What is 13 * 24?",
|
||||||
|
)
|
||||||
|
assert response is not None
|
||||||
|
print("response: ", response)
|
||||||
|
assert response.status == "completed"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||||
|
async def test_reasoning_item(client: OpenAI, model_name: str):
|
||||||
|
response = await client.responses.create(
|
||||||
|
model=model_name,
|
||||||
|
input=[
|
||||||
|
{"type": "message", "content": "Hello.", "role": "user"},
|
||||||
|
{
|
||||||
|
"type": "reasoning",
|
||||||
|
"id": "lol",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "reasoning_text",
|
||||||
|
"text": "We need to respond: greeting.",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"summary": [],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
temperature=0.0,
|
||||||
|
)
|
||||||
|
assert response is not None
|
||||||
|
assert response.status == "completed"
|
||||||
|
# make sure we get a reasoning and text output
|
||||||
|
assert response.output[0].type == "reasoning"
|
||||||
|
assert response.output[1].type == "message"
|
||||||
|
assert type(response.output[1].content[0].text) is str
|
||||||
@ -35,7 +35,7 @@ GET_WEATHER_SCHEMA = {
|
|||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def server():
|
def server():
|
||||||
args = ["--enforce-eager", "--tool-server", "demo"]
|
args = ["--enforce-eager", "--tool-server", "demo", "--max_model_len", "5000"]
|
||||||
env_dict = dict(
|
env_dict = dict(
|
||||||
VLLM_ENABLE_RESPONSES_API_STORE="1",
|
VLLM_ENABLE_RESPONSES_API_STORE="1",
|
||||||
PYTHON_EXECUTION_BACKEND="dangerously_use_uv",
|
PYTHON_EXECUTION_BACKEND="dangerously_use_uv",
|
||||||
@ -550,6 +550,31 @@ def call_function(name, args):
|
|||||||
raise ValueError(f"Unknown function: {name}")
|
raise ValueError(f"Unknown function: {name}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||||
|
async def test_reasoning_item(client: OpenAI, model_name: str):
|
||||||
|
response = await client.responses.create(
|
||||||
|
model=model_name,
|
||||||
|
input=[
|
||||||
|
{"type": "message", "content": "Hello.", "role": "user"},
|
||||||
|
{
|
||||||
|
"type": "reasoning",
|
||||||
|
"id": "lol",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "reasoning_text",
|
||||||
|
"text": "We need to respond: greeting.",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"summary": [],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
temperature=0.0,
|
||||||
|
)
|
||||||
|
assert response is not None
|
||||||
|
assert response.status == "completed"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||||
async def test_function_calling(client: OpenAI, model_name: str):
|
async def test_function_calling(client: OpenAI, model_name: str):
|
||||||
|
|||||||
@ -1,7 +1,15 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from openai.types.responses.response_reasoning_item import (
|
||||||
|
Content,
|
||||||
|
ResponseReasoningItem,
|
||||||
|
Summary,
|
||||||
|
)
|
||||||
|
|
||||||
from vllm.entrypoints.responses_utils import (
|
from vllm.entrypoints.responses_utils import (
|
||||||
|
construct_chat_message_with_tool_call,
|
||||||
convert_tool_responses_to_completions_format,
|
convert_tool_responses_to_completions_format,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -28,3 +36,53 @@ class TestResponsesUtils:
|
|||||||
result = convert_tool_responses_to_completions_format(input_tool)
|
result = convert_tool_responses_to_completions_format(input_tool)
|
||||||
|
|
||||||
assert result == {"type": "function", "function": input_tool}
|
assert result == {"type": "function", "function": input_tool}
|
||||||
|
|
||||||
|
def test_construct_chat_message_with_tool_call(self):
|
||||||
|
item = ResponseReasoningItem(
|
||||||
|
id="lol",
|
||||||
|
summary=[],
|
||||||
|
type="reasoning",
|
||||||
|
content=[
|
||||||
|
Content(
|
||||||
|
text="Leroy Jenkins",
|
||||||
|
type="reasoning_text",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
encrypted_content=None,
|
||||||
|
status=None,
|
||||||
|
)
|
||||||
|
formatted_item = construct_chat_message_with_tool_call(item)
|
||||||
|
assert formatted_item["role"] == "assistant"
|
||||||
|
assert formatted_item["reasoning"] == "Leroy Jenkins"
|
||||||
|
|
||||||
|
item = ResponseReasoningItem(
|
||||||
|
id="lol",
|
||||||
|
summary=[
|
||||||
|
Summary(
|
||||||
|
text='Hmm, the user has just started with a simple "Hello,"',
|
||||||
|
type="summary_text",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
type="reasoning",
|
||||||
|
content=None,
|
||||||
|
encrypted_content=None,
|
||||||
|
status=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
formatted_item = construct_chat_message_with_tool_call(item)
|
||||||
|
assert formatted_item["role"] == "assistant"
|
||||||
|
assert (
|
||||||
|
formatted_item["reasoning"]
|
||||||
|
== 'Hmm, the user has just started with a simple "Hello,"'
|
||||||
|
)
|
||||||
|
|
||||||
|
item = ResponseReasoningItem(
|
||||||
|
id="lol",
|
||||||
|
summary=[],
|
||||||
|
type="reasoning",
|
||||||
|
content=None,
|
||||||
|
encrypted_content="TOP_SECRET_MESSAGE",
|
||||||
|
status=None,
|
||||||
|
)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
construct_chat_message_with_tool_call(item)
|
||||||
|
|||||||
@ -10,6 +10,7 @@ from openai.types.chat.chat_completion_message_tool_call_param import (
|
|||||||
Function as FunctionCallTool,
|
Function as FunctionCallTool,
|
||||||
)
|
)
|
||||||
from openai.types.responses import ResponseFunctionToolCall
|
from openai.types.responses import ResponseFunctionToolCall
|
||||||
|
from openai.types.responses.response_reasoning_item import ResponseReasoningItem
|
||||||
from openai.types.responses.tool import Tool
|
from openai.types.responses.tool import Tool
|
||||||
|
|
||||||
from vllm import envs
|
from vllm import envs
|
||||||
@ -37,6 +38,18 @@ def construct_chat_message_with_tool_call(
|
|||||||
)
|
)
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
elif isinstance(item, ResponseReasoningItem):
|
||||||
|
reasoning_content = ""
|
||||||
|
if item.encrypted_content:
|
||||||
|
raise ValueError("Encrypted content is not supported.")
|
||||||
|
if len(item.summary) == 1:
|
||||||
|
reasoning_content = item.summary[0].text
|
||||||
|
elif item.content and len(item.content) == 1:
|
||||||
|
reasoning_content = item.content[0].text
|
||||||
|
return {
|
||||||
|
"role": "assistant",
|
||||||
|
"reasoning": reasoning_content,
|
||||||
|
}
|
||||||
elif item.get("type") == "function_call_output":
|
elif item.get("type") == "function_call_output":
|
||||||
# Append the function call output as a tool message.
|
# Append the function call output as a tool message.
|
||||||
return ChatCompletionToolMessageParam(
|
return ChatCompletionToolMessageParam(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user