mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 22:25:32 +08:00
[Bugfix]: Fix messy code when using logprobs (#19209)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
parent
af107d5a0e
commit
93b9d9f499
@ -14,9 +14,12 @@ from unittest.mock import patch
|
||||
import pytest
|
||||
import torch
|
||||
import zmq
|
||||
from transformers import AutoTokenizer
|
||||
from vllm_test_utils.monitor import monitor
|
||||
|
||||
from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config
|
||||
from vllm.transformers_utils.detokenizer_utils import (
|
||||
convert_ids_list_to_tokens)
|
||||
from vllm.utils import (CacheInfo, FlexibleArgumentParser, LRUCache,
|
||||
MemorySnapshot, PlaceholderModule, StoreBoolean,
|
||||
bind_kv_cache, common_broadcastable_dtype,
|
||||
@ -918,3 +921,14 @@ def test_split_host_port():
|
||||
def test_join_host_port():
|
||||
assert join_host_port("127.0.0.1", 5555) == "127.0.0.1:5555"
|
||||
assert join_host_port("::1", 5555) == "[::1]:5555"
|
||||
|
||||
|
||||
def test_convert_ids_list_to_tokens():
|
||||
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
|
||||
token_ids = tokenizer.encode("Hello, world!")
|
||||
# token_ids = [9707, 11, 1879, 0]
|
||||
assert tokenizer.convert_ids_to_tokens(token_ids) == [
|
||||
'Hello', ',', 'Ġworld', '!'
|
||||
]
|
||||
tokens = convert_ids_list_to_tokens(tokenizer, token_ids)
|
||||
assert tokens == ['Hello', ',', ' world', '!']
|
||||
|
||||
@ -35,7 +35,7 @@ def _ref_convert_id_to_token(
|
||||
Returns:
|
||||
String representation of input token id
|
||||
"""
|
||||
return tokenizer.convert_ids_to_tokens(token_id) or ""
|
||||
return tokenizer.decode([token_id]) or ""
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
||||
@ -78,6 +78,7 @@ def convert_prompt_ids_to_tokens(
|
||||
def convert_ids_list_to_tokens(
|
||||
tokenizer: AnyTokenizer,
|
||||
token_ids: list[int],
|
||||
skip_special_tokens: bool = False,
|
||||
) -> list[str]:
|
||||
"""Detokenize the input ids individually.
|
||||
|
||||
@ -89,8 +90,15 @@ def convert_ids_list_to_tokens(
|
||||
Python list of token string representations
|
||||
|
||||
"""
|
||||
token_str_lst = tokenizer.convert_ids_to_tokens(token_ids)
|
||||
_replace_none_with_empty(token_str_lst) # type: ignore
|
||||
token_str_lst = []
|
||||
for token_id in token_ids:
|
||||
token_str = tokenizer.decode(
|
||||
[token_id],
|
||||
skip_special_tokens=skip_special_tokens,
|
||||
)
|
||||
if token_str is None:
|
||||
token_str = ""
|
||||
token_str_lst.append(token_str)
|
||||
return token_str_lst
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user