mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 05:15:00 +08:00
fixed reasoning streaming with tool_choice="required" (#24108)
Signed-off-by: CNE Pierre FICHEPOIL <pierre-1.fichepoil@gendarmerie.interieur.gouv.fr> Signed-off-by: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> Co-authored-by: CNE Pierre FICHEPOIL <pierre-1.fichepoil@gendarmerie.interieur.gouv.fr> Co-authored-by: Chauncey <chaunceyjiang@gmail.com>
This commit is contained in:
parent
8f18feb191
commit
a4c29e6e82
@ -194,11 +194,19 @@ async def test_function_tool_use(
|
|||||||
)
|
)
|
||||||
|
|
||||||
output = []
|
output = []
|
||||||
|
reasoning = []
|
||||||
async for chunk in output_stream:
|
async for chunk in output_stream:
|
||||||
if chunk.choices and chunk.choices[0].delta.tool_calls:
|
if chunk.choices:
|
||||||
|
if enable_thinking and getattr(
|
||||||
|
chunk.choices[0].delta, "reasoning_content", None
|
||||||
|
):
|
||||||
|
reasoning.append(chunk.choices[0].delta.reasoning_content)
|
||||||
|
if chunk.choices[0].delta.tool_calls:
|
||||||
output.extend(chunk.choices[0].delta.tool_calls)
|
output.extend(chunk.choices[0].delta.tool_calls)
|
||||||
|
|
||||||
assert len(output) > 0
|
assert len(output) > 0
|
||||||
|
if enable_thinking:
|
||||||
|
assert len(reasoning) > 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
|
|||||||
@ -563,8 +563,6 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
# For reasoning parser and tool call all enabled
|
# For reasoning parser and tool call all enabled
|
||||||
added_content_delta_arr = [False] * num_choices
|
added_content_delta_arr = [False] * num_choices
|
||||||
reasoning_end_arr = [False] * num_choices
|
reasoning_end_arr = [False] * num_choices
|
||||||
elif request.tool_choice == "required":
|
|
||||||
all_previous_token_ids = None
|
|
||||||
else:
|
else:
|
||||||
all_previous_token_ids = None
|
all_previous_token_ids = None
|
||||||
|
|
||||||
@ -880,13 +878,40 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
previous_text = previous_texts[i]
|
previous_text = previous_texts[i]
|
||||||
current_text = previous_text + delta_text
|
current_text = previous_text + delta_text
|
||||||
fn_name_returned = function_name_returned[i]
|
fn_name_returned = function_name_returned[i]
|
||||||
|
output_token_ids = as_list(output.token_ids)
|
||||||
|
|
||||||
if self.reasoning_parser:
|
if (
|
||||||
_, content = reasoning_parser.extract_reasoning_content(
|
self.reasoning_parser is not None
|
||||||
current_text, request
|
and not reasoning_end_arr[i]
|
||||||
|
and res.prompt_token_ids
|
||||||
|
and reasoning_parser.is_reasoning_end(res.prompt_token_ids)
|
||||||
|
):
|
||||||
|
reasoning_end_arr[i] = True
|
||||||
|
|
||||||
|
if self.reasoning_parser and not reasoning_end_arr[i]:
|
||||||
|
delta_message = (
|
||||||
|
reasoning_parser.extract_reasoning_content_streaming(
|
||||||
|
previous_text,
|
||||||
|
current_text,
|
||||||
|
delta_text,
|
||||||
|
previous_token_ids,
|
||||||
|
current_token_ids,
|
||||||
|
output_token_ids,
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
if reasoning_parser.is_reasoning_end(output_token_ids):
|
||||||
|
reasoning_end_arr[i] = True
|
||||||
|
if delta_message and delta_message.content:
|
||||||
|
current_text = delta_message.content
|
||||||
|
delta_message.content = None
|
||||||
else:
|
else:
|
||||||
|
# reasoning ended
|
||||||
|
current_text = ""
|
||||||
|
|
||||||
|
else:
|
||||||
|
# either finished reasoning or no reasoning at all
|
||||||
content = current_text
|
content = current_text
|
||||||
|
|
||||||
delta_message, function_name_returned[i] = (
|
delta_message, function_name_returned[i] = (
|
||||||
self.extract_tool_call_required_streaming(
|
self.extract_tool_call_required_streaming(
|
||||||
previous_text=previous_text,
|
previous_text=previous_text,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user