[Bugfix] Fix missing first token in tool calls during reasoning-to-tool transition (#30671)

Signed-off-by: mondaylord <20212010046@fudan.edu.cn>
This commit is contained in:
mondaylord 2025-12-16 00:13:37 +08:00 committed by GitHub
parent 855b101d75
commit 17fec3af09
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -964,21 +964,9 @@ class OpenAIServingChat(OpenAIServing):
assert reasoning_end_arr is not None
output_token_ids = as_list(output.token_ids)
if not reasoning_end_arr[i]:
delta_message = (
reasoning_parser.extract_reasoning_streaming(
previous_text,
current_text,
delta_text,
previous_token_ids,
current_token_ids,
output_token_ids,
)
)
# When encountering think end id in prompt_token_ids
# i.e {"enable_thinking": False},
# set reasoning status to end.
# Remove the text and token ids related
# to 'reasoning'.
if (
res.prompt_token_ids
and reasoning_parser.is_reasoning_end(
@ -987,30 +975,38 @@ class OpenAIServingChat(OpenAIServing):
):
reasoning_end_arr[i] = True
current_token_ids = output_token_ids
if delta_message and delta_message.content:
current_text = delta_message.content
delta_message.content = None
else:
current_text = ""
# When encountering think end id in delta_token_ids,
# set reasoning status to end.
# Remove the text and token ids related
# to 'reasoning'.
if reasoning_parser.is_reasoning_end(output_token_ids):
reasoning_end_arr[i] = True
current_token_ids = (
reasoning_parser.extract_content_ids(
output_token_ids
# Don't update current_text, keep it as is from delta
else:
delta_message = (
reasoning_parser.extract_reasoning_streaming(
previous_text,
current_text,
delta_text,
previous_token_ids,
current_token_ids,
output_token_ids,
)
)
if delta_message and delta_message.content:
current_text = delta_message.content
delta_message.content = None
else:
current_text = ""
# When encountering think end id in delta_token_ids,
# set reasoning status to end.
# Remove the text and token ids related
# to 'reasoning'.
if reasoning_parser.is_reasoning_end(output_token_ids):
reasoning_end_arr[i] = True
current_token_ids = (
reasoning_parser.extract_content_ids(
output_token_ids
)
)
if delta_message and delta_message.content:
current_text = delta_message.content
delta_message.content = None
else:
current_text = ""
# handle tool calls only after reasoning is done,
else:
if reasoning_end_arr[i]:
delta_token_ids = output_token_ids
# First time to tool call,
# add the remaining text and token ids