mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-27 09:55:14 +08:00
[Bugfix] Fix missing first token in tool calls during reasoning-to-tool transition (#30671)
Signed-off-by: mondaylord <20212010046@fudan.edu.cn>
This commit is contained in:
parent
855b101d75
commit
17fec3af09
@ -964,21 +964,9 @@ class OpenAIServingChat(OpenAIServing):
|
||||
assert reasoning_end_arr is not None
|
||||
output_token_ids = as_list(output.token_ids)
|
||||
if not reasoning_end_arr[i]:
|
||||
delta_message = (
|
||||
reasoning_parser.extract_reasoning_streaming(
|
||||
previous_text,
|
||||
current_text,
|
||||
delta_text,
|
||||
previous_token_ids,
|
||||
current_token_ids,
|
||||
output_token_ids,
|
||||
)
|
||||
)
|
||||
# When encountering think end id in prompt_token_ids
|
||||
# i.e {"enable_thinking": False},
|
||||
# set reasoning status to end.
|
||||
# Remove the text and token ids related
|
||||
# to 'reasoning'.
|
||||
if (
|
||||
res.prompt_token_ids
|
||||
and reasoning_parser.is_reasoning_end(
|
||||
@ -987,30 +975,38 @@ class OpenAIServingChat(OpenAIServing):
|
||||
):
|
||||
reasoning_end_arr[i] = True
|
||||
current_token_ids = output_token_ids
|
||||
if delta_message and delta_message.content:
|
||||
current_text = delta_message.content
|
||||
delta_message.content = None
|
||||
else:
|
||||
current_text = ""
|
||||
# When encountering think end id in delta_token_ids,
|
||||
# set reasoning status to end.
|
||||
# Remove the text and token ids related
|
||||
# to 'reasoning'.
|
||||
if reasoning_parser.is_reasoning_end(output_token_ids):
|
||||
reasoning_end_arr[i] = True
|
||||
current_token_ids = (
|
||||
reasoning_parser.extract_content_ids(
|
||||
output_token_ids
|
||||
# Don't update current_text, keep it as is from delta
|
||||
else:
|
||||
delta_message = (
|
||||
reasoning_parser.extract_reasoning_streaming(
|
||||
previous_text,
|
||||
current_text,
|
||||
delta_text,
|
||||
previous_token_ids,
|
||||
current_token_ids,
|
||||
output_token_ids,
|
||||
)
|
||||
)
|
||||
if delta_message and delta_message.content:
|
||||
current_text = delta_message.content
|
||||
delta_message.content = None
|
||||
else:
|
||||
current_text = ""
|
||||
|
||||
# When encountering think end id in delta_token_ids,
|
||||
# set reasoning status to end.
|
||||
# Remove the text and token ids related
|
||||
# to 'reasoning'.
|
||||
if reasoning_parser.is_reasoning_end(output_token_ids):
|
||||
reasoning_end_arr[i] = True
|
||||
current_token_ids = (
|
||||
reasoning_parser.extract_content_ids(
|
||||
output_token_ids
|
||||
)
|
||||
)
|
||||
if delta_message and delta_message.content:
|
||||
current_text = delta_message.content
|
||||
delta_message.content = None
|
||||
else:
|
||||
current_text = ""
|
||||
|
||||
# handle tool calls only after reasoning is done,
|
||||
else:
|
||||
if reasoning_end_arr[i]:
|
||||
delta_token_ids = output_token_ids
|
||||
# First time to tool call,
|
||||
# add the remaining text and token ids
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user