mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-17 12:26:27 +08:00
Fix handling of special tokens in decoding. (#418)
This commit is contained in:
parent
51be365143
commit
c6dfc3cdbe
@ -276,6 +276,7 @@ class LLMEngine:
|
|||||||
seq.get_last_token_id(),
|
seq.get_last_token_id(),
|
||||||
skip_special_tokens=True,
|
skip_special_tokens=True,
|
||||||
)
|
)
|
||||||
|
if new_token is not None:
|
||||||
seq.output_tokens.append(new_token)
|
seq.output_tokens.append(new_token)
|
||||||
seq.output_text = new_output_text
|
seq.output_text = new_output_text
|
||||||
|
|
||||||
|
|||||||
@ -80,6 +80,8 @@ def detokenize_incrementally(
|
|||||||
new_token: The new token as a string.
|
new_token: The new token as a string.
|
||||||
output_text: The new output text as a string.
|
output_text: The new output text as a string.
|
||||||
"""
|
"""
|
||||||
|
if skip_special_tokens and (new_token_id in tokenizer.all_special_ids):
|
||||||
|
return None, prev_output_tokens
|
||||||
new_token = tokenizer.convert_ids_to_tokens(
|
new_token = tokenizer.convert_ids_to_tokens(
|
||||||
new_token_id, skip_special_tokens=skip_special_tokens)
|
new_token_id, skip_special_tokens=skip_special_tokens)
|
||||||
output_tokens = prev_output_tokens + [new_token]
|
output_tokens = prev_output_tokens + [new_token]
|
||||||
@ -99,7 +101,7 @@ def detokenize_incrementally(
|
|||||||
sub_texts = []
|
sub_texts = []
|
||||||
current_sub_text = []
|
current_sub_text = []
|
||||||
for token in output_tokens:
|
for token in output_tokens:
|
||||||
if skip_special_tokens and token in tokenizer.all_special_ids:
|
if skip_special_tokens and token in tokenizer.all_special_tokens:
|
||||||
continue
|
continue
|
||||||
if token in tokenizer.added_tokens_encoder:
|
if token in tokenizer.added_tokens_encoder:
|
||||||
if current_sub_text:
|
if current_sub_text:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user