Update where bytes_to_unicode is imported from (#30771)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor 2025-12-16 16:05:01 +00:00 committed by GitHub
parent 0b0acc758e
commit e1625498f4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -21,8 +21,8 @@ from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput
if TYPE_CHECKING: if TYPE_CHECKING:
import outlines_core as oc import outlines_core as oc
import transformers.file_utils as file_utils import transformers.file_utils as file_utils
import transformers.models.gpt2.tokenization_gpt2 as tokenization_gpt2
import xgrammar as xgr import xgrammar as xgr
from transformers.convert_slow_tokenizer import bytes_to_unicode
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
from vllm.v1.worker.gpu_input_batch import InputBatch from vllm.v1.worker.gpu_input_batch import InputBatch
@ -30,10 +30,8 @@ else:
xgr = LazyLoader("xgr", globals(), "xgrammar") xgr = LazyLoader("xgr", globals(), "xgrammar")
oc = LazyLoader("oc", globals(), "outlines_core") oc = LazyLoader("oc", globals(), "outlines_core")
file_utils = LazyLoader("file_utils", globals(), "transformers.file_utils") file_utils = LazyLoader("file_utils", globals(), "transformers.file_utils")
tokenization_gpt2 = LazyLoader( bytes_to_unicode = LazyLoader(
"tokenization_gpt2", "bytes_to_unicode", globals(), "transformers.convert_slow_tokenizer"
globals(),
"transformers.models.gpt2.tokenization_gpt2",
) )
TokenizerLike = object TokenizerLike = object
@ -204,7 +202,7 @@ def _reduced_vocabulary(
A Dict of token string -> equivalent token ids A Dict of token string -> equivalent token ids
""" """
unicode_to_bytes = {v: k for k, v in tokenization_gpt2.bytes_to_unicode().items()} unicode_to_bytes = {v: k for k, v in bytes_to_unicode().items()}
def convert_token_to_string(token: str) -> str: def convert_token_to_string(token: str) -> str:
string = tokenizer.convert_tokens_to_string([token]) string = tokenizer.convert_tokens_to_string([token])