diff --git a/vllm/v1/structured_output/utils.py b/vllm/v1/structured_output/utils.py index ae42b33f80f88..cb5ad99cfbdf7 100644 --- a/vllm/v1/structured_output/utils.py +++ b/vllm/v1/structured_output/utils.py @@ -21,8 +21,8 @@ from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput if TYPE_CHECKING: import outlines_core as oc import transformers.file_utils as file_utils - import transformers.models.gpt2.tokenization_gpt2 as tokenization_gpt2 import xgrammar as xgr + from transformers.convert_slow_tokenizer import bytes_to_unicode from vllm.tokenizers import TokenizerLike from vllm.v1.worker.gpu_input_batch import InputBatch @@ -30,10 +30,8 @@ else: xgr = LazyLoader("xgr", globals(), "xgrammar") oc = LazyLoader("oc", globals(), "outlines_core") file_utils = LazyLoader("file_utils", globals(), "transformers.file_utils") - tokenization_gpt2 = LazyLoader( - "tokenization_gpt2", - globals(), - "transformers.models.gpt2.tokenization_gpt2", + bytes_to_unicode = LazyLoader( + "bytes_to_unicode", globals(), "transformers.convert_slow_tokenizer" ) TokenizerLike = object @@ -204,7 +202,7 @@ def _reduced_vocabulary( A Dict of token string -> equivalent token ids """ - unicode_to_bytes = {v: k for k, v in tokenization_gpt2.bytes_to_unicode().items()} + unicode_to_bytes = {v: k for k, v in bytes_to_unicode().items()} def convert_token_to_string(token: str) -> str: string = tokenizer.convert_tokens_to_string([token])