mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-24 10:45:01 +08:00
Update where bytes_to_unicode is imported from (#30771)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
0b0acc758e
commit
e1625498f4
@ -21,8 +21,8 @@ from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput
|
|||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
import outlines_core as oc
|
import outlines_core as oc
|
||||||
import transformers.file_utils as file_utils
|
import transformers.file_utils as file_utils
|
||||||
import transformers.models.gpt2.tokenization_gpt2 as tokenization_gpt2
|
|
||||||
import xgrammar as xgr
|
import xgrammar as xgr
|
||||||
|
from transformers.convert_slow_tokenizer import bytes_to_unicode
|
||||||
|
|
||||||
from vllm.tokenizers import TokenizerLike
|
from vllm.tokenizers import TokenizerLike
|
||||||
from vllm.v1.worker.gpu_input_batch import InputBatch
|
from vllm.v1.worker.gpu_input_batch import InputBatch
|
||||||
@ -30,10 +30,8 @@ else:
|
|||||||
xgr = LazyLoader("xgr", globals(), "xgrammar")
|
xgr = LazyLoader("xgr", globals(), "xgrammar")
|
||||||
oc = LazyLoader("oc", globals(), "outlines_core")
|
oc = LazyLoader("oc", globals(), "outlines_core")
|
||||||
file_utils = LazyLoader("file_utils", globals(), "transformers.file_utils")
|
file_utils = LazyLoader("file_utils", globals(), "transformers.file_utils")
|
||||||
tokenization_gpt2 = LazyLoader(
|
bytes_to_unicode = LazyLoader(
|
||||||
"tokenization_gpt2",
|
"bytes_to_unicode", globals(), "transformers.convert_slow_tokenizer"
|
||||||
globals(),
|
|
||||||
"transformers.models.gpt2.tokenization_gpt2",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
TokenizerLike = object
|
TokenizerLike = object
|
||||||
@ -204,7 +202,7 @@ def _reduced_vocabulary(
|
|||||||
A Dict of token string -> equivalent token ids
|
A Dict of token string -> equivalent token ids
|
||||||
"""
|
"""
|
||||||
|
|
||||||
unicode_to_bytes = {v: k for k, v in tokenization_gpt2.bytes_to_unicode().items()}
|
unicode_to_bytes = {v: k for k, v in bytes_to_unicode().items()}
|
||||||
|
|
||||||
def convert_token_to_string(token: str) -> str:
|
def convert_token_to_string(token: str) -> str:
|
||||||
string = tokenizer.convert_tokens_to_string([token])
|
string = tokenizer.convert_tokens_to_string([token])
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user