[Chore] Use tokenizer.encode and tokenizer.decode directly (#29851)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung 2025-12-02 20:30:40 +08:00 committed by GitHub
parent 951445a52d
commit 68ffbca7e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 36 additions and 40 deletions

View File

@ -22,8 +22,11 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict
from vllm.multimodal.cache import MultiModalProcessorOnlyCache
from vllm.multimodal.inputs import MultiModalInputs
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
from vllm.transformers_utils.tokenizer import encode_tokens
from vllm.tokenizers import (
MistralTokenizer,
TokenizerLike,
cached_tokenizer_from_config,
)
from ....multimodal.utils import random_audio, random_image, random_video
from ...registry import (
@ -151,7 +154,7 @@ def get_text_token_prompts(
mm_data: MultiModalDataDict,
):
dummy_inputs = processor.dummy_inputs
tokenizer = processor.info.get_tokenizer()
tokenizer: TokenizerLike = processor.info.get_tokenizer()
model_config = processor.info.ctx.model_config
model_type = model_config.hf_config.model_type
@ -188,10 +191,9 @@ def get_text_token_prompts(
assert isinstance(inputs.prompt, str)
text_prompt = inputs.prompt
token_prompt = encode_tokens(
tokenizer,
token_prompt = tokenizer.encode(
text_prompt,
add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type),
add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type, True),
)
return text_prompt, token_prompt

View File

@ -5,7 +5,6 @@
import pytest
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.transformers_utils.tokenizer import encode_tokens
from ....conftest import ImageTestAssets
from ...utils import build_model_context
@ -48,7 +47,7 @@ def test_processor_override(
]
}
if tokenized_prompt:
prompt = encode_tokens(tokenizer, prompt)
prompt = tokenizer.encode(prompt)
processed_inputs = processor.apply(prompt, mm_data, mm_processor_kwargs)
mm_data = processed_inputs["mm_kwargs"].get_data()

View File

@ -37,7 +37,7 @@ from vllm.inputs.data import PromptType
from vllm.logger import init_logger
from vllm.model_executor.models import SupportsTranscription
from vllm.outputs import RequestOutput
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.tokenizers import get_tokenizer
from vllm.utils.import_utils import PlaceholderModule
try:

View File

@ -33,7 +33,7 @@ class RenderConfig:
`0` yields an empty list (and skips embeds).
`-1` maps to `model_config.max_model_len`."""
add_special_tokens: bool | None = True
add_special_tokens: bool = True
"""Whether to add model-specific special tokens during tokenization."""
cache_salt: str | None = None
@ -315,7 +315,7 @@ class CompletionRenderer(BaseRenderer):
text: str,
max_length: int | None,
truncate_prompt_tokens: int | None,
add_special_tokens: bool | None,
add_special_tokens: bool,
cache_salt: str | None,
) -> EngineTokensPrompt:
"""Tokenize text input asynchronously."""

View File

@ -19,7 +19,7 @@ from vllm.inputs import TokensPrompt
from vllm.model_executor.models.interfaces import supports_score_template
from vllm.multimodal.inputs import MultiModalDataDict
from vllm.outputs import PoolingRequestOutput
from vllm.transformers_utils.tokenizer import TokenizerLike
from vllm.tokenizers import TokenizerLike
ScoreContentPartParam: TypeAlias = (
ChatCompletionContentPartImageParam | ChatCompletionContentPartImageEmbedsParam

View File

@ -75,7 +75,6 @@ from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sequence import IntermediateTensors
from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config
from vllm.transformers_utils.configs.radio import RadioConfig
from vllm.transformers_utils.tokenizer import encode_tokens
from vllm.utils.tensor_schema import TensorSchema, TensorShape
from .utils import _merge_multimodal_embeddings
@ -454,14 +453,12 @@ class NanoNemotronVLProcessor(BaseNanoNemotronVLProcessor):
# Pre-tokenize special tokens for video processing
# to avoid repeated tokenization
self._img_start_token_ids = encode_tokens(
tokenizer, IMG_START, add_special_tokens=False
self._img_start_token_ids = tokenizer.encode(
IMG_START, add_special_tokens=False
)
self._img_end_token_ids = encode_tokens(
tokenizer, IMG_END, add_special_tokens=False
)
self._img_context_token_ids = encode_tokens(
tokenizer, IMG_CONTEXT, add_special_tokens=False
self._img_end_token_ids = tokenizer.encode(IMG_END, add_special_tokens=False)
self._img_context_token_ids = tokenizer.encode(
IMG_CONTEXT, add_special_tokens=False
)
@property
@ -1179,14 +1176,12 @@ class NemotronH_Nano_VL_V2(
# Pre-tokenize special tokens for video processing
# to avoid repeated tokenization
tokenizer = cached_tokenizer_from_config(vllm_config.model_config)
self._img_start_token_ids = encode_tokens(
tokenizer, IMG_START, add_special_tokens=False
self._img_start_token_ids = tokenizer.encode(
IMG_START, add_special_tokens=False
)
self._img_end_token_ids = encode_tokens(
tokenizer, IMG_END, add_special_tokens=False
)
self._img_context_token_ids = encode_tokens(
tokenizer, IMG_CONTEXT, add_special_tokens=False
self._img_end_token_ids = tokenizer.encode(IMG_END, add_special_tokens=False)
self._img_context_token_ids = tokenizer.encode(
IMG_CONTEXT, add_special_tokens=False
)
def pixel_shuffle(self, x, scale_factor=0.5):

View File

@ -88,7 +88,6 @@ from vllm.multimodal.processing import (
)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.tokenizer import encode_tokens
from vllm.utils.tensor_schema import TensorSchema, TensorShape
from .interfaces import (
@ -591,7 +590,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
tokenization_kwargs=tokenization_kwargs,
)
tokenizer = self.info.get_tokenizer()
prompt_ids = encode_tokens(tokenizer, prompt)
prompt_ids = tokenizer.encode(prompt)
else:
prompt_ids = self._apply_hf_processor_tokens_only(prompt)

View File

@ -25,7 +25,6 @@ from typing_extensions import TypeVar, assert_never
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.transformers_utils.processor import cached_processor_from_config
from vllm.transformers_utils.tokenizer import decode_tokens, encode_tokens
from vllm.utils.collection_utils import flatten_2d_lists, full_groupby
from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
from vllm.utils.jsontree import JSONTree, json_map_leaves
@ -80,9 +79,9 @@ def _cached_encode(
tokenizer: TokenizerLike,
text: str,
*,
add_special_tokens: bool | None = None,
add_special_tokens: bool = True,
) -> list[int]:
return encode_tokens(tokenizer, text, add_special_tokens=add_special_tokens)
return tokenizer.encode(text, add_special_tokens=add_special_tokens)
@lru_cache(maxsize=2048)
@ -90,11 +89,9 @@ def _cached_decode(
tokenizer: TokenizerLike,
token_ids: tuple[int, ...],
*,
skip_special_tokens: bool | None = None,
skip_special_tokens: bool = False,
) -> str:
return decode_tokens(
tokenizer, list(token_ids), skip_special_tokens=skip_special_tokens
)
return tokenizer.decode(list(token_ids), skip_special_tokens=skip_special_tokens)
def _seq2text(
@ -110,7 +107,7 @@ def _seq2text(
raise ValueError("You cannot decode tokens when `skip_tokenizer_init=True`")
if not use_cache:
return decode_tokens(tokenizer, seq)
return tokenizer.decode(seq)
return _cached_decode(tokenizer, tuple(seq))
@ -126,7 +123,7 @@ def _seq2tokens(
raise ValueError("You cannot encode text when `skip_tokenizer_init=True`")
if not use_cache:
return encode_tokens(tokenizer, seq, add_special_tokens=False)
return tokenizer.encode(seq, add_special_tokens=False)
return _cached_encode(tokenizer, seq, add_special_tokens=False)
@ -2198,8 +2195,8 @@ class EncDecMultiModalProcessor(BaseMultiModalProcessor[_I]):
tokenizer = self.info.get_tokenizer()
decoder_prompt_raw = self.create_decoder_prompt(prompt, mm_data)
if isinstance(decoder_prompt_raw, str):
decoder_prompt_ids = encode_tokens(
tokenizer, decoder_prompt_raw, add_special_tokens=False
decoder_prompt_ids = tokenizer.encode(
decoder_prompt_raw, add_special_tokens=False
)
else:
decoder_prompt_ids = decoder_prompt_raw

View File

@ -4,6 +4,8 @@
import warnings
from typing import Any
from typing_extensions import deprecated
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
@ -73,6 +75,7 @@ def __getattr__(name: str):
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@deprecated("Will be removed in v0.13. Please use `tokenizer.decode()` instead.")
def decode_tokens(
tokenizer: TokenizerLike,
token_ids: list[int],
@ -94,6 +97,7 @@ def decode_tokens(
return tokenizer.decode(token_ids, **kw_args)
@deprecated("Will be removed in v0.13. Please use `tokenizer.encode()` instead.")
def encode_tokens(
tokenizer: TokenizerLike,
text: str,