mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-19 19:57:03 +08:00
[Bugfix] Fix missing first token in tool calls during reasoning-to-tool transition (#30671)
Signed-off-by: mondaylord <20212010046@fudan.edu.cn>
This commit is contained in:
parent
855b101d75
commit
17fec3af09
@ -964,21 +964,9 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
assert reasoning_end_arr is not None
|
assert reasoning_end_arr is not None
|
||||||
output_token_ids = as_list(output.token_ids)
|
output_token_ids = as_list(output.token_ids)
|
||||||
if not reasoning_end_arr[i]:
|
if not reasoning_end_arr[i]:
|
||||||
delta_message = (
|
|
||||||
reasoning_parser.extract_reasoning_streaming(
|
|
||||||
previous_text,
|
|
||||||
current_text,
|
|
||||||
delta_text,
|
|
||||||
previous_token_ids,
|
|
||||||
current_token_ids,
|
|
||||||
output_token_ids,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
# When encountering think end id in prompt_token_ids
|
# When encountering think end id in prompt_token_ids
|
||||||
# i.e {"enable_thinking": False},
|
# i.e {"enable_thinking": False},
|
||||||
# set reasoning status to end.
|
# set reasoning status to end.
|
||||||
# Remove the text and token ids related
|
|
||||||
# to 'reasoning'.
|
|
||||||
if (
|
if (
|
||||||
res.prompt_token_ids
|
res.prompt_token_ids
|
||||||
and reasoning_parser.is_reasoning_end(
|
and reasoning_parser.is_reasoning_end(
|
||||||
@ -987,30 +975,38 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
):
|
):
|
||||||
reasoning_end_arr[i] = True
|
reasoning_end_arr[i] = True
|
||||||
current_token_ids = output_token_ids
|
current_token_ids = output_token_ids
|
||||||
if delta_message and delta_message.content:
|
# Don't update current_text, keep it as is from delta
|
||||||
current_text = delta_message.content
|
else:
|
||||||
delta_message.content = None
|
delta_message = (
|
||||||
else:
|
reasoning_parser.extract_reasoning_streaming(
|
||||||
current_text = ""
|
previous_text,
|
||||||
# When encountering think end id in delta_token_ids,
|
current_text,
|
||||||
# set reasoning status to end.
|
delta_text,
|
||||||
# Remove the text and token ids related
|
previous_token_ids,
|
||||||
# to 'reasoning'.
|
current_token_ids,
|
||||||
if reasoning_parser.is_reasoning_end(output_token_ids):
|
output_token_ids,
|
||||||
reasoning_end_arr[i] = True
|
|
||||||
current_token_ids = (
|
|
||||||
reasoning_parser.extract_content_ids(
|
|
||||||
output_token_ids
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
if delta_message and delta_message.content:
|
|
||||||
current_text = delta_message.content
|
# When encountering think end id in delta_token_ids,
|
||||||
delta_message.content = None
|
# set reasoning status to end.
|
||||||
else:
|
# Remove the text and token ids related
|
||||||
current_text = ""
|
# to 'reasoning'.
|
||||||
|
if reasoning_parser.is_reasoning_end(output_token_ids):
|
||||||
|
reasoning_end_arr[i] = True
|
||||||
|
current_token_ids = (
|
||||||
|
reasoning_parser.extract_content_ids(
|
||||||
|
output_token_ids
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if delta_message and delta_message.content:
|
||||||
|
current_text = delta_message.content
|
||||||
|
delta_message.content = None
|
||||||
|
else:
|
||||||
|
current_text = ""
|
||||||
|
|
||||||
# handle tool calls only after reasoning is done,
|
# handle tool calls only after reasoning is done,
|
||||||
else:
|
if reasoning_end_arr[i]:
|
||||||
delta_token_ids = output_token_ids
|
delta_token_ids = output_token_ids
|
||||||
# First time to tool call,
|
# First time to tool call,
|
||||||
# add the remaining text and token ids
|
# add the remaining text and token ids
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user