mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 04:44:57 +08:00
[Frontend] Avoid list copies in serving_chat.py (#22947)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
fbd88728b3
commit
f6b5040590
@ -50,6 +50,7 @@ from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
|
||||
from vllm.transformers_utils.tokenizers import (maybe_serialize_tool_calls,
|
||||
truncate_tool_call_ids,
|
||||
validate_request_params)
|
||||
from vllm.utils import as_list
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@ -670,10 +671,10 @@ class OpenAIServingChat(OpenAIServing):
|
||||
|
||||
# avoid the None + list error.
|
||||
if previous_token_ids:
|
||||
current_token_ids = previous_token_ids + list(
|
||||
current_token_ids = previous_token_ids + as_list(
|
||||
output.token_ids)
|
||||
else:
|
||||
current_token_ids = list(output.token_ids)
|
||||
current_token_ids = as_list(output.token_ids)
|
||||
|
||||
if self.use_harmony:
|
||||
if is_final:
|
||||
@ -703,11 +704,10 @@ class OpenAIServingChat(OpenAIServing):
|
||||
# set reasoning status to end.
|
||||
# Only keep 'content', remove 'reasoning_content'.
|
||||
if reasoning_parser.is_reasoning_end(
|
||||
list(output.token_ids)) or \
|
||||
(res.prompt_token_ids and
|
||||
reasoning_parser.is_reasoning_end(
|
||||
list(res.prompt_token_ids)
|
||||
)):
|
||||
as_list(output.token_ids)) or (
|
||||
res.prompt_token_ids
|
||||
and reasoning_parser.is_reasoning_end(
|
||||
res.prompt_token_ids)):
|
||||
reasoning_end_arr[i] = True
|
||||
if delta_message and delta_message.content:
|
||||
# This need to be added to next `delta_text`
|
||||
@ -771,6 +771,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
assert reasoning_parser is not None
|
||||
assert added_content_delta_arr is not None
|
||||
assert reasoning_end_arr is not None
|
||||
output_token_ids = as_list(output.token_ids)
|
||||
if not reasoning_end_arr[i]:
|
||||
delta_message = (
|
||||
reasoning_parser.
|
||||
@ -780,7 +781,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
delta_text,
|
||||
previous_token_ids,
|
||||
current_token_ids,
|
||||
output.token_ids,
|
||||
output_token_ids,
|
||||
))
|
||||
# When encountering think end id in prompt_token_ids
|
||||
# i.e {"enable_thinking": False},
|
||||
@ -789,9 +790,9 @@ class OpenAIServingChat(OpenAIServing):
|
||||
# to 'reasoning_content'.
|
||||
if res.prompt_token_ids and \
|
||||
reasoning_parser.is_reasoning_end(
|
||||
list(res.prompt_token_ids)):
|
||||
res.prompt_token_ids):
|
||||
reasoning_end_arr[i] = True
|
||||
current_token_ids = list(output.token_ids)
|
||||
current_token_ids = output_token_ids
|
||||
if delta_message and delta_message.content:
|
||||
current_text = delta_message.content
|
||||
delta_message.content = None
|
||||
@ -802,11 +803,11 @@ class OpenAIServingChat(OpenAIServing):
|
||||
# Remove the text and token ids related
|
||||
# to 'reasoning_content'.
|
||||
if reasoning_parser.is_reasoning_end(
|
||||
list(output.token_ids)):
|
||||
output_token_ids):
|
||||
reasoning_end_arr[i] = True
|
||||
current_token_ids = \
|
||||
reasoning_parser.extract_content_ids(
|
||||
list(output.token_ids))
|
||||
output_token_ids)
|
||||
if delta_message and delta_message.content:
|
||||
current_text = delta_message.content
|
||||
delta_message.content = None
|
||||
@ -815,7 +816,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
|
||||
# handle tool calls only after reasoning is done,
|
||||
else:
|
||||
delta_token_ids = list(output.token_ids)
|
||||
delta_token_ids = output_token_ids
|
||||
# First time to tool call,
|
||||
# add the remaining text and token ids
|
||||
# to delta from previous
|
||||
@ -899,7 +900,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
self.request_logger.log_outputs(
|
||||
request_id=request_id,
|
||||
outputs=delta_content,
|
||||
output_token_ids=list(output.token_ids),
|
||||
output_token_ids=as_list(output.token_ids),
|
||||
finish_reason=output.finish_reason,
|
||||
is_streaming=True,
|
||||
delta=True,
|
||||
|
||||
@ -44,7 +44,7 @@ class ReasoningParser:
|
||||
return self.model_tokenizer.get_vocab()
|
||||
|
||||
@abstractmethod
|
||||
def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
|
||||
def is_reasoning_end(self, input_ids: list[int]) -> bool:
|
||||
"""
|
||||
Check if the reasoning content ends in the input_ids.
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user