mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-23 12:24:29 +08:00
[Chore] Use tokenizer.encode and tokenizer.decode directly (#29851)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
951445a52d
commit
68ffbca7e4
@ -22,8 +22,11 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict
|
||||
from vllm.multimodal.cache import MultiModalProcessorOnlyCache
|
||||
from vllm.multimodal.inputs import MultiModalInputs
|
||||
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
|
||||
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
|
||||
from vllm.transformers_utils.tokenizer import encode_tokens
|
||||
from vllm.tokenizers import (
|
||||
MistralTokenizer,
|
||||
TokenizerLike,
|
||||
cached_tokenizer_from_config,
|
||||
)
|
||||
|
||||
from ....multimodal.utils import random_audio, random_image, random_video
|
||||
from ...registry import (
|
||||
@ -151,7 +154,7 @@ def get_text_token_prompts(
|
||||
mm_data: MultiModalDataDict,
|
||||
):
|
||||
dummy_inputs = processor.dummy_inputs
|
||||
tokenizer = processor.info.get_tokenizer()
|
||||
tokenizer: TokenizerLike = processor.info.get_tokenizer()
|
||||
model_config = processor.info.ctx.model_config
|
||||
|
||||
model_type = model_config.hf_config.model_type
|
||||
@ -188,10 +191,9 @@ def get_text_token_prompts(
|
||||
assert isinstance(inputs.prompt, str)
|
||||
|
||||
text_prompt = inputs.prompt
|
||||
token_prompt = encode_tokens(
|
||||
tokenizer,
|
||||
token_prompt = tokenizer.encode(
|
||||
text_prompt,
|
||||
add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type),
|
||||
add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type, True),
|
||||
)
|
||||
|
||||
return text_prompt, token_prompt
|
||||
|
||||
@ -5,7 +5,6 @@
|
||||
import pytest
|
||||
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
from vllm.transformers_utils.tokenizer import encode_tokens
|
||||
|
||||
from ....conftest import ImageTestAssets
|
||||
from ...utils import build_model_context
|
||||
@ -48,7 +47,7 @@ def test_processor_override(
|
||||
]
|
||||
}
|
||||
if tokenized_prompt:
|
||||
prompt = encode_tokens(tokenizer, prompt)
|
||||
prompt = tokenizer.encode(prompt)
|
||||
|
||||
processed_inputs = processor.apply(prompt, mm_data, mm_processor_kwargs)
|
||||
mm_data = processed_inputs["mm_kwargs"].get_data()
|
||||
|
||||
@ -37,7 +37,7 @@ from vllm.inputs.data import PromptType
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.models import SupportsTranscription
|
||||
from vllm.outputs import RequestOutput
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
from vllm.utils.import_utils import PlaceholderModule
|
||||
|
||||
try:
|
||||
|
||||
@ -33,7 +33,7 @@ class RenderConfig:
|
||||
`0` yields an empty list (and skips embeds).
|
||||
`-1` maps to `model_config.max_model_len`."""
|
||||
|
||||
add_special_tokens: bool | None = True
|
||||
add_special_tokens: bool = True
|
||||
"""Whether to add model-specific special tokens during tokenization."""
|
||||
|
||||
cache_salt: str | None = None
|
||||
@ -315,7 +315,7 @@ class CompletionRenderer(BaseRenderer):
|
||||
text: str,
|
||||
max_length: int | None,
|
||||
truncate_prompt_tokens: int | None,
|
||||
add_special_tokens: bool | None,
|
||||
add_special_tokens: bool,
|
||||
cache_salt: str | None,
|
||||
) -> EngineTokensPrompt:
|
||||
"""Tokenize text input asynchronously."""
|
||||
|
||||
@ -19,7 +19,7 @@ from vllm.inputs import TokensPrompt
|
||||
from vllm.model_executor.models.interfaces import supports_score_template
|
||||
from vllm.multimodal.inputs import MultiModalDataDict
|
||||
from vllm.outputs import PoolingRequestOutput
|
||||
from vllm.transformers_utils.tokenizer import TokenizerLike
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
|
||||
ScoreContentPartParam: TypeAlias = (
|
||||
ChatCompletionContentPartImageParam | ChatCompletionContentPartImageEmbedsParam
|
||||
|
||||
@ -75,7 +75,6 @@ from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config
|
||||
from vllm.transformers_utils.configs.radio import RadioConfig
|
||||
from vllm.transformers_utils.tokenizer import encode_tokens
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
|
||||
from .utils import _merge_multimodal_embeddings
|
||||
@ -454,14 +453,12 @@ class NanoNemotronVLProcessor(BaseNanoNemotronVLProcessor):
|
||||
|
||||
# Pre-tokenize special tokens for video processing
|
||||
# to avoid repeated tokenization
|
||||
self._img_start_token_ids = encode_tokens(
|
||||
tokenizer, IMG_START, add_special_tokens=False
|
||||
self._img_start_token_ids = tokenizer.encode(
|
||||
IMG_START, add_special_tokens=False
|
||||
)
|
||||
self._img_end_token_ids = encode_tokens(
|
||||
tokenizer, IMG_END, add_special_tokens=False
|
||||
)
|
||||
self._img_context_token_ids = encode_tokens(
|
||||
tokenizer, IMG_CONTEXT, add_special_tokens=False
|
||||
self._img_end_token_ids = tokenizer.encode(IMG_END, add_special_tokens=False)
|
||||
self._img_context_token_ids = tokenizer.encode(
|
||||
IMG_CONTEXT, add_special_tokens=False
|
||||
)
|
||||
|
||||
@property
|
||||
@ -1179,14 +1176,12 @@ class NemotronH_Nano_VL_V2(
|
||||
# Pre-tokenize special tokens for video processing
|
||||
# to avoid repeated tokenization
|
||||
tokenizer = cached_tokenizer_from_config(vllm_config.model_config)
|
||||
self._img_start_token_ids = encode_tokens(
|
||||
tokenizer, IMG_START, add_special_tokens=False
|
||||
self._img_start_token_ids = tokenizer.encode(
|
||||
IMG_START, add_special_tokens=False
|
||||
)
|
||||
self._img_end_token_ids = encode_tokens(
|
||||
tokenizer, IMG_END, add_special_tokens=False
|
||||
)
|
||||
self._img_context_token_ids = encode_tokens(
|
||||
tokenizer, IMG_CONTEXT, add_special_tokens=False
|
||||
self._img_end_token_ids = tokenizer.encode(IMG_END, add_special_tokens=False)
|
||||
self._img_context_token_ids = tokenizer.encode(
|
||||
IMG_CONTEXT, add_special_tokens=False
|
||||
)
|
||||
|
||||
def pixel_shuffle(self, x, scale_factor=0.5):
|
||||
|
||||
@ -88,7 +88,6 @@ from vllm.multimodal.processing import (
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.tokenizer import encode_tokens
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
|
||||
from .interfaces import (
|
||||
@ -591,7 +590,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
)
|
||||
tokenizer = self.info.get_tokenizer()
|
||||
prompt_ids = encode_tokens(tokenizer, prompt)
|
||||
prompt_ids = tokenizer.encode(prompt)
|
||||
else:
|
||||
prompt_ids = self._apply_hf_processor_tokens_only(prompt)
|
||||
|
||||
|
||||
@ -25,7 +25,6 @@ from typing_extensions import TypeVar, assert_never
|
||||
from vllm.logger import init_logger
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.transformers_utils.processor import cached_processor_from_config
|
||||
from vllm.transformers_utils.tokenizer import decode_tokens, encode_tokens
|
||||
from vllm.utils.collection_utils import flatten_2d_lists, full_groupby
|
||||
from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
|
||||
from vllm.utils.jsontree import JSONTree, json_map_leaves
|
||||
@ -80,9 +79,9 @@ def _cached_encode(
|
||||
tokenizer: TokenizerLike,
|
||||
text: str,
|
||||
*,
|
||||
add_special_tokens: bool | None = None,
|
||||
add_special_tokens: bool = True,
|
||||
) -> list[int]:
|
||||
return encode_tokens(tokenizer, text, add_special_tokens=add_special_tokens)
|
||||
return tokenizer.encode(text, add_special_tokens=add_special_tokens)
|
||||
|
||||
|
||||
@lru_cache(maxsize=2048)
|
||||
@ -90,11 +89,9 @@ def _cached_decode(
|
||||
tokenizer: TokenizerLike,
|
||||
token_ids: tuple[int, ...],
|
||||
*,
|
||||
skip_special_tokens: bool | None = None,
|
||||
skip_special_tokens: bool = False,
|
||||
) -> str:
|
||||
return decode_tokens(
|
||||
tokenizer, list(token_ids), skip_special_tokens=skip_special_tokens
|
||||
)
|
||||
return tokenizer.decode(list(token_ids), skip_special_tokens=skip_special_tokens)
|
||||
|
||||
|
||||
def _seq2text(
|
||||
@ -110,7 +107,7 @@ def _seq2text(
|
||||
raise ValueError("You cannot decode tokens when `skip_tokenizer_init=True`")
|
||||
|
||||
if not use_cache:
|
||||
return decode_tokens(tokenizer, seq)
|
||||
return tokenizer.decode(seq)
|
||||
|
||||
return _cached_decode(tokenizer, tuple(seq))
|
||||
|
||||
@ -126,7 +123,7 @@ def _seq2tokens(
|
||||
raise ValueError("You cannot encode text when `skip_tokenizer_init=True`")
|
||||
|
||||
if not use_cache:
|
||||
return encode_tokens(tokenizer, seq, add_special_tokens=False)
|
||||
return tokenizer.encode(seq, add_special_tokens=False)
|
||||
|
||||
return _cached_encode(tokenizer, seq, add_special_tokens=False)
|
||||
|
||||
@ -2198,8 +2195,8 @@ class EncDecMultiModalProcessor(BaseMultiModalProcessor[_I]):
|
||||
tokenizer = self.info.get_tokenizer()
|
||||
decoder_prompt_raw = self.create_decoder_prompt(prompt, mm_data)
|
||||
if isinstance(decoder_prompt_raw, str):
|
||||
decoder_prompt_ids = encode_tokens(
|
||||
tokenizer, decoder_prompt_raw, add_special_tokens=False
|
||||
decoder_prompt_ids = tokenizer.encode(
|
||||
decoder_prompt_raw, add_special_tokens=False
|
||||
)
|
||||
else:
|
||||
decoder_prompt_ids = decoder_prompt_raw
|
||||
|
||||
@ -4,6 +4,8 @@
|
||||
import warnings
|
||||
from typing import Any
|
||||
|
||||
from typing_extensions import deprecated
|
||||
|
||||
from vllm.logger import init_logger
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
|
||||
@ -73,6 +75,7 @@ def __getattr__(name: str):
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
|
||||
@deprecated("Will be removed in v0.13. Please use `tokenizer.decode()` instead.")
|
||||
def decode_tokens(
|
||||
tokenizer: TokenizerLike,
|
||||
token_ids: list[int],
|
||||
@ -94,6 +97,7 @@ def decode_tokens(
|
||||
return tokenizer.decode(token_ids, **kw_args)
|
||||
|
||||
|
||||
@deprecated("Will be removed in v0.13. Please use `tokenizer.encode()` instead.")
|
||||
def encode_tokens(
|
||||
tokenizer: TokenizerLike,
|
||||
text: str,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user