mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 13:25:01 +08:00
[Frontend] Avoid list copies in serving_chat.py (#22947)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
fbd88728b3
commit
f6b5040590
@ -50,6 +50,7 @@ from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
|
|||||||
from vllm.transformers_utils.tokenizers import (maybe_serialize_tool_calls,
|
from vllm.transformers_utils.tokenizers import (maybe_serialize_tool_calls,
|
||||||
truncate_tool_call_ids,
|
truncate_tool_call_ids,
|
||||||
validate_request_params)
|
validate_request_params)
|
||||||
|
from vllm.utils import as_list
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
@ -670,10 +671,10 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
|
|
||||||
# avoid the None + list error.
|
# avoid the None + list error.
|
||||||
if previous_token_ids:
|
if previous_token_ids:
|
||||||
current_token_ids = previous_token_ids + list(
|
current_token_ids = previous_token_ids + as_list(
|
||||||
output.token_ids)
|
output.token_ids)
|
||||||
else:
|
else:
|
||||||
current_token_ids = list(output.token_ids)
|
current_token_ids = as_list(output.token_ids)
|
||||||
|
|
||||||
if self.use_harmony:
|
if self.use_harmony:
|
||||||
if is_final:
|
if is_final:
|
||||||
@ -703,11 +704,10 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
# set reasoning status to end.
|
# set reasoning status to end.
|
||||||
# Only keep 'content', remove 'reasoning_content'.
|
# Only keep 'content', remove 'reasoning_content'.
|
||||||
if reasoning_parser.is_reasoning_end(
|
if reasoning_parser.is_reasoning_end(
|
||||||
list(output.token_ids)) or \
|
as_list(output.token_ids)) or (
|
||||||
(res.prompt_token_ids and
|
res.prompt_token_ids
|
||||||
reasoning_parser.is_reasoning_end(
|
and reasoning_parser.is_reasoning_end(
|
||||||
list(res.prompt_token_ids)
|
res.prompt_token_ids)):
|
||||||
)):
|
|
||||||
reasoning_end_arr[i] = True
|
reasoning_end_arr[i] = True
|
||||||
if delta_message and delta_message.content:
|
if delta_message and delta_message.content:
|
||||||
# This need to be added to next `delta_text`
|
# This need to be added to next `delta_text`
|
||||||
@ -771,6 +771,7 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
assert reasoning_parser is not None
|
assert reasoning_parser is not None
|
||||||
assert added_content_delta_arr is not None
|
assert added_content_delta_arr is not None
|
||||||
assert reasoning_end_arr is not None
|
assert reasoning_end_arr is not None
|
||||||
|
output_token_ids = as_list(output.token_ids)
|
||||||
if not reasoning_end_arr[i]:
|
if not reasoning_end_arr[i]:
|
||||||
delta_message = (
|
delta_message = (
|
||||||
reasoning_parser.
|
reasoning_parser.
|
||||||
@ -780,7 +781,7 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
delta_text,
|
delta_text,
|
||||||
previous_token_ids,
|
previous_token_ids,
|
||||||
current_token_ids,
|
current_token_ids,
|
||||||
output.token_ids,
|
output_token_ids,
|
||||||
))
|
))
|
||||||
# When encountering think end id in prompt_token_ids
|
# When encountering think end id in prompt_token_ids
|
||||||
# i.e {"enable_thinking": False},
|
# i.e {"enable_thinking": False},
|
||||||
@ -789,9 +790,9 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
# to 'reasoning_content'.
|
# to 'reasoning_content'.
|
||||||
if res.prompt_token_ids and \
|
if res.prompt_token_ids and \
|
||||||
reasoning_parser.is_reasoning_end(
|
reasoning_parser.is_reasoning_end(
|
||||||
list(res.prompt_token_ids)):
|
res.prompt_token_ids):
|
||||||
reasoning_end_arr[i] = True
|
reasoning_end_arr[i] = True
|
||||||
current_token_ids = list(output.token_ids)
|
current_token_ids = output_token_ids
|
||||||
if delta_message and delta_message.content:
|
if delta_message and delta_message.content:
|
||||||
current_text = delta_message.content
|
current_text = delta_message.content
|
||||||
delta_message.content = None
|
delta_message.content = None
|
||||||
@ -802,11 +803,11 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
# Remove the text and token ids related
|
# Remove the text and token ids related
|
||||||
# to 'reasoning_content'.
|
# to 'reasoning_content'.
|
||||||
if reasoning_parser.is_reasoning_end(
|
if reasoning_parser.is_reasoning_end(
|
||||||
list(output.token_ids)):
|
output_token_ids):
|
||||||
reasoning_end_arr[i] = True
|
reasoning_end_arr[i] = True
|
||||||
current_token_ids = \
|
current_token_ids = \
|
||||||
reasoning_parser.extract_content_ids(
|
reasoning_parser.extract_content_ids(
|
||||||
list(output.token_ids))
|
output_token_ids)
|
||||||
if delta_message and delta_message.content:
|
if delta_message and delta_message.content:
|
||||||
current_text = delta_message.content
|
current_text = delta_message.content
|
||||||
delta_message.content = None
|
delta_message.content = None
|
||||||
@ -815,7 +816,7 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
|
|
||||||
# handle tool calls only after reasoning is done,
|
# handle tool calls only after reasoning is done,
|
||||||
else:
|
else:
|
||||||
delta_token_ids = list(output.token_ids)
|
delta_token_ids = output_token_ids
|
||||||
# First time to tool call,
|
# First time to tool call,
|
||||||
# add the remaining text and token ids
|
# add the remaining text and token ids
|
||||||
# to delta from previous
|
# to delta from previous
|
||||||
@ -899,7 +900,7 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
self.request_logger.log_outputs(
|
self.request_logger.log_outputs(
|
||||||
request_id=request_id,
|
request_id=request_id,
|
||||||
outputs=delta_content,
|
outputs=delta_content,
|
||||||
output_token_ids=list(output.token_ids),
|
output_token_ids=as_list(output.token_ids),
|
||||||
finish_reason=output.finish_reason,
|
finish_reason=output.finish_reason,
|
||||||
is_streaming=True,
|
is_streaming=True,
|
||||||
delta=True,
|
delta=True,
|
||||||
|
|||||||
@ -44,7 +44,7 @@ class ReasoningParser:
|
|||||||
return self.model_tokenizer.get_vocab()
|
return self.model_tokenizer.get_vocab()
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
|
def is_reasoning_end(self, input_ids: list[int]) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if the reasoning content ends in the input_ids.
|
Check if the reasoning content ends in the input_ids.
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user