mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 02:55:40 +08:00
[Bugfix]: Fix the incompatibility issue with stream when Thinking is disabled (#19135)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
parent
af7fc84fd2
commit
8fc57501d3
@ -1,6 +1,8 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from typing import NamedTuple
|
||||
|
||||
import openai # use the official client for correctness check
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
@ -22,7 +24,9 @@ def server(): # noqa: F811
|
||||
"--guided-decoding-backend",
|
||||
"xgrammar",
|
||||
"--tool-call-parser",
|
||||
"hermes"
|
||||
"hermes",
|
||||
"--reasoning-parser",
|
||||
"qwen3",
|
||||
]
|
||||
|
||||
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
|
||||
@ -35,9 +39,53 @@ async def client(server):
|
||||
yield async_client
|
||||
|
||||
|
||||
class TestCase(NamedTuple):
|
||||
model_name: str
|
||||
stream: bool
|
||||
tool_choice: str
|
||||
enable_thinking: bool
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_required_tool_use(client: openai.AsyncOpenAI, model_name: str):
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
TestCase(model_name=MODEL_NAME,
|
||||
stream=True,
|
||||
tool_choice="auto",
|
||||
enable_thinking=False),
|
||||
TestCase(model_name=MODEL_NAME,
|
||||
stream=False,
|
||||
tool_choice="auto",
|
||||
enable_thinking=False),
|
||||
TestCase(model_name=MODEL_NAME,
|
||||
stream=True,
|
||||
tool_choice="required",
|
||||
enable_thinking=False),
|
||||
TestCase(model_name=MODEL_NAME,
|
||||
stream=False,
|
||||
tool_choice="required",
|
||||
enable_thinking=False),
|
||||
TestCase(model_name=MODEL_NAME,
|
||||
stream=True,
|
||||
tool_choice="auto",
|
||||
enable_thinking=True),
|
||||
TestCase(model_name=MODEL_NAME,
|
||||
stream=False,
|
||||
tool_choice="auto",
|
||||
enable_thinking=True),
|
||||
TestCase(model_name=MODEL_NAME,
|
||||
stream=True,
|
||||
tool_choice="required",
|
||||
enable_thinking=True),
|
||||
TestCase(model_name=MODEL_NAME,
|
||||
stream=False,
|
||||
tool_choice="required",
|
||||
enable_thinking=True),
|
||||
],
|
||||
)
|
||||
async def test_function_tool_use(client: openai.AsyncOpenAI,
|
||||
test_case: TestCase):
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
@ -126,26 +174,34 @@ async def test_required_tool_use(client: openai.AsyncOpenAI, model_name: str):
|
||||
"forecast for the next 5 days, in fahrenheit?",
|
||||
},
|
||||
]
|
||||
|
||||
if not test_case.stream:
|
||||
# Non-streaming test
|
||||
chat_completion = await client.chat.completions.create(
|
||||
messages=messages,
|
||||
model=model_name,
|
||||
model=test_case.model_name,
|
||||
tools=tools,
|
||||
tool_choice="required",
|
||||
)
|
||||
tool_choice=test_case.tool_choice,
|
||||
extra_body={
|
||||
"chat_template_kwargs": {
|
||||
"enable_thinking": test_case.enable_thinking
|
||||
}
|
||||
})
|
||||
|
||||
assert chat_completion.choices[0].message.tool_calls is not None
|
||||
assert len(chat_completion.choices[0].message.tool_calls) > 0
|
||||
|
||||
else:
|
||||
# Streaming test
|
||||
stream = await client.chat.completions.create(
|
||||
messages=messages,
|
||||
model=model_name,
|
||||
model=test_case.model_name,
|
||||
tools=tools,
|
||||
tool_choice="required",
|
||||
tool_choice=test_case.tool_choice,
|
||||
stream=True,
|
||||
)
|
||||
extra_body={
|
||||
"chat_template_kwargs": {
|
||||
"enable_thinking": test_case.enable_thinking
|
||||
}
|
||||
})
|
||||
|
||||
output = []
|
||||
async for chunk in stream:
|
||||
|
||||
@ -689,7 +689,21 @@ class OpenAIServingChat(OpenAIServing):
|
||||
current_token_ids,
|
||||
output.token_ids,
|
||||
))
|
||||
|
||||
# When encountering think end id in prompt_token_ids
|
||||
# i.e {"enable_thinking": False},
|
||||
# set reasoning status to end.
|
||||
# Remove the text and token ids related
|
||||
# to 'reasoning_content'.
|
||||
if res.prompt_token_ids and \
|
||||
reasoning_parser.is_reasoning_end(
|
||||
list(res.prompt_token_ids)):
|
||||
reasoning_end_arr[i] = True
|
||||
current_token_ids = list(output.token_ids)
|
||||
if delta_message and delta_message.content:
|
||||
current_text = delta_message.content
|
||||
delta_message.content = None
|
||||
else:
|
||||
current_text = ""
|
||||
# When encountering think end id in delta_token_ids,
|
||||
# set reasoning status to end.
|
||||
# Remove the text and token ids related
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user