[Chore] Move tokenizer initialization methods (#29793)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung 2025-12-02 13:33:37 +08:00 committed by GitHub
parent e2fbfc955e
commit 653591d5e7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
51 changed files with 150 additions and 129 deletions

View File

@ -40,7 +40,7 @@ from vllm.engine.arg_utils import EngineArgs
from vllm.utils.argparse_utils import FlexibleArgumentParser from vllm.utils.argparse_utils import FlexibleArgumentParser
try: try:
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
except ImportError: except ImportError:
from backend_request_func import get_tokenizer from backend_request_func import get_tokenizer

View File

@ -46,7 +46,7 @@ from tqdm.asyncio import tqdm
from transformers import PreTrainedTokenizerBase from transformers import PreTrainedTokenizerBase
try: try:
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
except ImportError: except ImportError:
from backend_request_func import get_tokenizer from backend_request_func import get_tokenizer

View File

@ -8,7 +8,7 @@ import torch
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
from vllm.config.compilation import CompilationMode, DynamicShapesType from vllm.config.compilation import CompilationMode, DynamicShapesType
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from vllm.utils.torch_utils import is_torch_equal_or_newer from vllm.utils.torch_utils import is_torch_equal_or_newer

View File

@ -6,7 +6,7 @@ import pytest
from vllm.config import ModelConfig from vllm.config import ModelConfig
from vllm.entrypoints.chat_utils import apply_hf_chat_template, load_chat_template from vllm.entrypoints.chat_utils import apply_hf_chat_template, load_chat_template
from vllm.entrypoints.openai.protocol import ChatCompletionRequest from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from ...models.registry import HF_EXAMPLE_MODELS from ...models.registry import HF_EXAMPLE_MODELS
from ...utils import VLLM_PATH from ...utils import VLLM_PATH

View File

@ -14,7 +14,7 @@ from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from vllm.v1.engine.async_llm import AsyncLLM from vllm.v1.engine.async_llm import AsyncLLM
MODEL_NAME = "openai-community/gpt2" MODEL_NAME = "openai-community/gpt2"

View File

@ -3,7 +3,7 @@
import pytest import pytest
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer

View File

@ -7,7 +7,7 @@
import pytest import pytest
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer

View File

@ -14,7 +14,7 @@ from vllm.config.multimodal import MultiModalConfig
from vllm.entrypoints.openai.protocol import ChatCompletionRequest from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from vllm.v1.engine.async_llm import AsyncLLM from vllm.v1.engine.async_llm import AsyncLLM
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer

View File

@ -7,7 +7,7 @@ import tempfile
import pytest import pytest
from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer

View File

@ -5,7 +5,7 @@ import pytest
import pytest_asyncio import pytest_asyncio
import requests import requests
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer

View File

@ -271,7 +271,7 @@ async def test_streaming_product_tool_call():
@pytest.fixture @pytest.fixture
def qwen_tokenizer() -> TokenizerLike: def qwen_tokenizer() -> TokenizerLike:
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
return get_tokenizer("Qwen/Qwen3-32B") return get_tokenizer("Qwen/Qwen3-32B")

View File

@ -18,7 +18,7 @@ from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.pooling.embed.protocol import EmbeddingResponse from vllm.entrypoints.pooling.embed.protocol import EmbeddingResponse
from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from vllm.utils.serial_utils import ( from vllm.utils.serial_utils import (
EMBED_DTYPE_TO_TORCH_DTYPE, EMBED_DTYPE_TO_TORCH_DTYPE,
ENDIANNESS, ENDIANNESS,

View File

@ -12,7 +12,7 @@ import torch
from tests.models.utils import check_embeddings_close from tests.models.utils import check_embeddings_close
from tests.utils import RemoteOpenAIServer from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from vllm.utils.serial_utils import ( from vllm.utils.serial_utils import (
EMBED_DTYPE_TO_TORCH_DTYPE, EMBED_DTYPE_TO_TORCH_DTYPE,
ENDIANNESS, ENDIANNESS,

View File

@ -28,8 +28,7 @@ from vllm.multimodal.utils import (
encode_image_base64, encode_image_base64,
encode_video_base64, encode_video_base64,
) )
from vllm.tokenizers import MistralTokenizer from vllm.tokenizers import MistralTokenizer, get_tokenizer
from vllm.transformers_utils.tokenizer import get_tokenizer
from ..models.registry import HF_EXAMPLE_MODELS from ..models.registry import HF_EXAMPLE_MODELS
from ..utils import VLLM_PATH from ..utils import VLLM_PATH

View File

@ -22,11 +22,8 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict
from vllm.multimodal.cache import MultiModalProcessorOnlyCache from vllm.multimodal.cache import MultiModalProcessorOnlyCache
from vllm.multimodal.inputs import MultiModalInputs from vllm.multimodal.inputs import MultiModalInputs
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
from vllm.tokenizers import MistralTokenizer from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
from vllm.transformers_utils.tokenizer import ( from vllm.transformers_utils.tokenizer import encode_tokens
cached_tokenizer_from_config,
encode_tokens,
)
from ....multimodal.utils import random_audio, random_image, random_video from ....multimodal.utils import random_audio, random_image, random_video
from ...registry import ( from ...registry import (

View File

@ -31,7 +31,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, BatchedTensorInputs
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
from vllm.multimodal.utils import group_mm_kwargs_by_modality from vllm.multimodal.utils import group_mm_kwargs_by_modality
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config from vllm.tokenizers import cached_tokenizer_from_config
from vllm.utils.collection_utils import is_list_of from vllm.utils.collection_utils import is_list_of
from vllm.utils.torch_utils import set_default_torch_dtype from vllm.utils.torch_utils import set_default_torch_dtype

View File

@ -13,7 +13,7 @@ from transformers import PretrainedConfig
from vllm.config.model import ModelConfig, ModelDType, RunnerOption from vllm.config.model import ModelConfig, ModelDType, RunnerOption
from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs
from vllm.multimodal.processing import InputProcessingContext from vllm.multimodal.processing import InputProcessingContext
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config from vllm.tokenizers import cached_tokenizer_from_config
from .. import ci_envs from .. import ci_envs
from .registry import HF_EXAMPLE_MODELS from .registry import HF_EXAMPLE_MODELS

View File

@ -7,7 +7,7 @@ from vllm.config import ModelConfig
from vllm.inputs import zip_enc_dec_prompts from vllm.inputs import zip_enc_dec_prompts
from vllm.inputs.parse import parse_raw_prompts from vllm.inputs.parse import parse_raw_prompts
from vllm.inputs.preprocess import InputPreprocessor from vllm.inputs.preprocess import InputPreprocessor
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs from vllm.tokenizers import init_tokenizer_from_config
pytestmark = pytest.mark.cpu_test pytestmark = pytest.mark.cpu_test
@ -108,7 +108,7 @@ def test_zip_enc_dec_prompts(mm_processor_kwargs, expected_mm_kwargs):
) )
def test_preprocessor_always_mm_code_path(model_id, prompt): def test_preprocessor_always_mm_code_path(model_id, prompt):
model_config = ModelConfig(model=model_id) model_config = ModelConfig(model=model_id)
tokenizer = init_tokenizer_from_configs(model_config) tokenizer = init_tokenizer_from_config(model_config)
input_preprocessor = InputPreprocessor(model_config, tokenizer) input_preprocessor = InputPreprocessor(model_config, tokenizer)
# HF processor adds sep token # HF processor adds sep token

View File

@ -5,8 +5,7 @@ from typing import _get_protocol_attrs # type: ignore
import pytest import pytest
from transformers import PreTrainedTokenizerBase from transformers import PreTrainedTokenizerBase
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.transformers_utils.tokenizer import get_tokenizer
def _get_missing_attrs(obj: object, target: type): def _get_missing_attrs(obj: object, target: type):

View File

@ -2,8 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from pathlib import Path from pathlib import Path
from vllm.tokenizers import TokenizerLike, TokenizerRegistry from vllm.tokenizers import TokenizerLike, TokenizerRegistry, get_tokenizer
from vllm.transformers_utils.tokenizer import get_tokenizer
class TestTokenizer(TokenizerLike): class TestTokenizer(TokenizerLike):

View File

@ -6,7 +6,7 @@ import pytest
from vllm.entrypoints.openai.tool_parsers.deepseekv31_tool_parser import ( from vllm.entrypoints.openai.tool_parsers.deepseekv31_tool_parser import (
DeepSeekV31ToolParser, DeepSeekV31ToolParser,
) )
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
MODEL = "deepseek-ai/DeepSeek-V3.1" MODEL = "deepseek-ai/DeepSeek-V3.1"

View File

@ -14,9 +14,8 @@ from vllm.entrypoints.openai.protocol import (
ToolCall, ToolCall,
) )
from vllm.entrypoints.openai.tool_parsers.ernie45_tool_parser import Ernie45ToolParser from vllm.entrypoints.openai.tool_parsers.ernie45_tool_parser import Ernie45ToolParser
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import get_tokenizer
# Use a common model that is likely to be available # Use a common model that is likely to be available
MODEL = "baidu/ERNIE-4.5-21B-A3B-Thinking" MODEL = "baidu/ERNIE-4.5-21B-A3B-Thinking"

View File

@ -10,7 +10,7 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers.glm4_moe_tool_parser import ( from vllm.entrypoints.openai.tool_parsers.glm4_moe_tool_parser import (
Glm4MoeModelToolParser, Glm4MoeModelToolParser,
) )
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
pytestmark = pytest.mark.cpu_test pytestmark = pytest.mark.cpu_test

View File

@ -10,9 +10,8 @@ from partial_json_parser.core.options import Allow
from vllm.entrypoints.openai.protocol import DeltaMessage, FunctionCall, ToolCall from vllm.entrypoints.openai.protocol import DeltaMessage, FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers.jamba_tool_parser import JambaToolParser from vllm.entrypoints.openai.tool_parsers.jamba_tool_parser import JambaToolParser
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import get_tokenizer
pytestmark = pytest.mark.cpu_test pytestmark = pytest.mark.cpu_test

View File

@ -8,7 +8,7 @@ import pytest
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser from vllm.entrypoints.openai.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
pytestmark = pytest.mark.cpu_test pytestmark = pytest.mark.cpu_test

View File

@ -13,7 +13,7 @@ from vllm.entrypoints.openai.protocol import (
ToolCall, ToolCall,
) )
from vllm.entrypoints.openai.tool_parsers.minimax_tool_parser import MinimaxToolParser from vllm.entrypoints.openai.tool_parsers.minimax_tool_parser import MinimaxToolParser
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
pytestmark = pytest.mark.cpu_test pytestmark = pytest.mark.cpu_test

View File

@ -16,7 +16,7 @@ from openai_harmony import (
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers.openai_tool_parser import OpenAIToolParser from vllm.entrypoints.openai.tool_parsers.openai_tool_parser import OpenAIToolParser
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
MODEL = "gpt2" MODEL = "gpt2"

View File

@ -17,9 +17,8 @@ from vllm.entrypoints.openai.tool_parsers.qwen3coder_tool_parser import (
Qwen3CoderToolParser, Qwen3CoderToolParser,
) )
from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import get_tokenizer
pytestmark = pytest.mark.cpu_test pytestmark = pytest.mark.cpu_test

View File

@ -15,9 +15,8 @@ from vllm.entrypoints.openai.protocol import (
ToolCall, ToolCall,
) )
from vllm.entrypoints.openai.tool_parsers.seed_oss_tool_parser import SeedOssToolParser from vllm.entrypoints.openai.tool_parsers.seed_oss_tool_parser import SeedOssToolParser
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import get_tokenizer
pytestmark = pytest.mark.cpu_test pytestmark = pytest.mark.cpu_test

View File

@ -13,9 +13,8 @@ from vllm.entrypoints.openai.protocol import (
ToolCall, ToolCall,
) )
from vllm.entrypoints.openai.tool_parsers.xlam_tool_parser import xLAMToolParser from vllm.entrypoints.openai.tool_parsers.xlam_tool_parser import xLAMToolParser
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import get_tokenizer
pytestmark = pytest.mark.cpu_test pytestmark = pytest.mark.cpu_test

View File

@ -6,8 +6,8 @@ only get the `eos_token_id` from the tokenizer as defined by
`vllm.LLMEngine._get_eos_token_id`. `vllm.LLMEngine._get_eos_token_id`.
""" """
from vllm.tokenizers import get_tokenizer
from vllm.transformers_utils.config import try_get_generation_config from vllm.transformers_utils.config import try_get_generation_config
from vllm.transformers_utils.tokenizer import get_tokenizer
def test_get_llama3_eos_token(): def test_get_llama3_eos_token():

View File

@ -44,7 +44,7 @@ from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.entrypoints.cli.serve import ServeSubcommand from vllm.entrypoints.cli.serve import ServeSubcommand
from vllm.model_executor.model_loader import get_model_loader from vllm.model_executor.model_loader import get_model_loader
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from vllm.utils.argparse_utils import FlexibleArgumentParser from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.mem_constants import GB_bytes from vllm.utils.mem_constants import GB_bytes
from vllm.utils.network_utils import get_open_port from vllm.utils.network_utils import get_open_port

View File

@ -9,7 +9,7 @@ import regex as re
from openai import BadRequestError from openai import BadRequestError
from tests.utils import RemoteOpenAIServer from tests.utils import RemoteOpenAIServer
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
# any model with a chat template should work here # any model with a chat template should work here
MODEL_NAME = "facebook/opt-125m" MODEL_NAME = "facebook/opt-125m"

View File

@ -14,7 +14,7 @@ import pytest
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
if TYPE_CHECKING: if TYPE_CHECKING:
from tests.conftest import VllmRunner from tests.conftest import VllmRunner

View File

@ -47,7 +47,7 @@ from vllm.benchmarks.lib.endpoint_request_func import (
) )
from vllm.benchmarks.lib.ready_checker import wait_for_endpoint from vllm.benchmarks.lib.ready_checker import wait_for_endpoint
from vllm.benchmarks.lib.utils import convert_to_pytorch_benchmark_format, write_to_json from vllm.benchmarks.lib.utils import convert_to_pytorch_benchmark_format, write_to_json
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from vllm.utils.gc_utils import freeze_gc_heap from vllm.utils.gc_utils import freeze_gc_heap
from vllm.utils.network_utils import join_host_port from vllm.utils.network_utils import join_host_port

View File

@ -444,7 +444,7 @@ def load_weights_using_from_2_way_softmax(
) )
loaded_weights = pooling_model_cls.load_weights(model, weights, load_lm_head=True) loaded_weights = pooling_model_cls.load_weights(model, weights, load_lm_head=True)
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
tokenizer = get_tokenizer( tokenizer = get_tokenizer(
model_config.tokenizer, model_config.tokenizer,
@ -498,7 +498,7 @@ def load_weights_no_post_processing(model, weights: Iterable[tuple[str, torch.Te
# Skip ModelForSequenceClassification in MRO to avoid infinite recursion # Skip ModelForSequenceClassification in MRO to avoid infinite recursion
loaded_weights = type(model).__mro__[1].load_weights(model, weights) loaded_weights = type(model).__mro__[1].load_weights(model, weights)
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
tokenizer = get_tokenizer( tokenizer = get_tokenizer(
model_config.tokenizer, model_config.tokenizer,

View File

@ -45,6 +45,7 @@ from vllm.multimodal.processing import (
from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.sequence import IntermediateTensors from vllm.sequence import IntermediateTensors
from vllm.tokenizers import cached_tokenizer_from_config
from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekVLV2Config from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekVLV2Config
from vllm.transformers_utils.processors.deepseek_ocr import ( from vllm.transformers_utils.processors.deepseek_ocr import (
BASE_SIZE, BASE_SIZE,
@ -53,7 +54,6 @@ from vllm.transformers_utils.processors.deepseek_ocr import (
DeepseekOCRProcessor, DeepseekOCRProcessor,
count_tiles, count_tiles,
) )
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
from vllm.utils.tensor_schema import TensorSchema, TensorShape from vllm.utils.tensor_schema import TensorSchema, TensorShape
from vllm.v1.sample.logits_processor import ( from vllm.v1.sample.logits_processor import (
AdapterLogitsProcessor, AdapterLogitsProcessor,

View File

@ -41,13 +41,13 @@ from vllm.multimodal.processing import (
) )
from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sequence import IntermediateTensors from vllm.sequence import IntermediateTensors
from vllm.tokenizers import cached_tokenizer_from_config
from vllm.transformers_utils.configs.deepseek_vl2 import ( from vllm.transformers_utils.configs.deepseek_vl2 import (
DeepseekVLV2Config, DeepseekVLV2Config,
MlpProjectorConfig, MlpProjectorConfig,
VisionEncoderConfig, VisionEncoderConfig,
) )
from vllm.transformers_utils.processors.deepseek_vl2 import DeepseekVLV2Processor from vllm.transformers_utils.processors.deepseek_vl2 import DeepseekVLV2Processor
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
from vllm.utils.tensor_schema import TensorSchema, TensorShape from vllm.utils.tensor_schema import TensorSchema, TensorShape
from vllm.utils.torch_utils import set_default_torch_dtype from vllm.utils.torch_utils import set_default_torch_dtype

View File

@ -59,8 +59,8 @@ from vllm.multimodal.processing import (
) )
from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sequence import IntermediateTensors from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.processor import cached_get_processor from vllm.tokenizers import cached_tokenizer_from_config
from vllm.transformers_utils.tokenizer import cached_get_tokenizer from vllm.transformers_utils.processor import cached_processor_from_config
from vllm.utils.tensor_schema import TensorSchema, TensorShape from vllm.utils.tensor_schema import TensorSchema, TensorShape
from .blip2 import Blip2QFormerModel from .blip2 import Blip2QFormerModel
@ -862,7 +862,7 @@ class GraniteSpeechForConditionalGeneration(
else: else:
raise ValueError(f"Unsupported task type {task_type}") raise ValueError(f"Unsupported task type {task_type}")
tokenizer = cached_get_tokenizer(model_config.model) tokenizer = cached_tokenizer_from_config(model_config)
chat = [dict(role="user", content=user_prompt)] chat = [dict(role="user", content=user_prompt)]
prompt = tokenizer.apply_chat_template( prompt = tokenizer.apply_chat_template(
chat, chat,
@ -886,7 +886,7 @@ class GraniteSpeechForConditionalGeneration(
model_config: ModelConfig, model_config: ModelConfig,
) -> int | None: ) -> int | None:
"""Get the number of audio tokens for an audio duration in sec.""" """Get the number of audio tokens for an audio duration in sec."""
processor = cached_get_processor(model_config.model) processor = cached_processor_from_config(model_config)
hop_length = processor.audio_processor.melspec_kwargs["hop_length"] hop_length = processor.audio_processor.melspec_kwargs["hop_length"]
proj_win_size = processor.audio_processor.projector_window_size proj_win_size = processor.audio_processor.projector_window_size
ds_rate = processor.audio_processor.projector_downsample_rate ds_rate = processor.audio_processor.projector_downsample_rate

View File

@ -19,7 +19,7 @@ from vllm.model_executor.layers.pooler import (
) )
from vllm.model_executor.models.llama import LlamaForCausalLM from vllm.model_executor.models.llama import LlamaForCausalLM
from vllm.tasks import PoolingTask from vllm.tasks import PoolingTask
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config from vllm.tokenizers import cached_tokenizer_from_config
from vllm.v1.outputs import PoolerOutput from vllm.v1.outputs import PoolerOutput
from vllm.v1.pool.metadata import PoolingMetadata from vllm.v1.pool.metadata import PoolingMetadata

View File

@ -73,12 +73,9 @@ from vllm.multimodal.processing import (
) )
from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sequence import IntermediateTensors from vllm.sequence import IntermediateTensors
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config
from vllm.transformers_utils.configs.radio import RadioConfig from vllm.transformers_utils.configs.radio import RadioConfig
from vllm.transformers_utils.tokenizer import ( from vllm.transformers_utils.tokenizer import encode_tokens
cached_tokenizer_from_config,
encode_tokens,
)
from vllm.utils.tensor_schema import TensorSchema, TensorShape from vllm.utils.tensor_schema import TensorSchema, TensorShape
from .utils import _merge_multimodal_embeddings from .utils import _merge_multimodal_embeddings

View File

@ -59,8 +59,7 @@ from vllm.multimodal.processing import (
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.sequence import IntermediateTensors from vllm.sequence import IntermediateTensors
from vllm.tokenizers import MistralTokenizer from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
from vllm.utils.tensor_schema import TensorSchema, TensorShape from vllm.utils.tensor_schema import TensorSchema, TensorShape
from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP

View File

@ -51,8 +51,7 @@ from vllm.multimodal.processing import (
) )
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
from vllm.sequence import IntermediateTensors from vllm.sequence import IntermediateTensors
from vllm.tokenizers import MistralTokenizer from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
from .interfaces import SupportsLoRA, SupportsMultiModal, SupportsTranscription from .interfaces import SupportsLoRA, SupportsMultiModal, SupportsTranscription
from .utils import init_vllm_registered_model, maybe_prefix from .utils import init_vllm_registered_model, maybe_prefix

View File

@ -48,7 +48,7 @@ from vllm.multimodal.processing import (
PromptUpdate, PromptUpdate,
) )
from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.transformers_utils.processor import cached_get_processor from vllm.transformers_utils.processor import cached_processor_from_config
from vllm.utils.jsontree import json_map_leaves from vllm.utils.jsontree import json_map_leaves
from vllm.utils.tensor_schema import TensorSchema, TensorShape from vllm.utils.tensor_schema import TensorSchema, TensorShape
from vllm.utils.torch_utils import set_default_torch_dtype from vllm.utils.torch_utils import set_default_torch_dtype
@ -850,7 +850,7 @@ class WhisperForConditionalGeneration(
def get_speech_to_text_config( def get_speech_to_text_config(
cls, model_config: ModelConfig, task_type: str cls, model_config: ModelConfig, task_type: str
) -> SpeechToTextConfig: ) -> SpeechToTextConfig:
processor = cached_get_processor(model_config.model) processor = cached_processor_from_config(model_config)
return SpeechToTextConfig( return SpeechToTextConfig(
max_audio_clip_s=processor.feature_extractor.chunk_length, max_audio_clip_s=processor.feature_extractor.chunk_length,
@ -864,7 +864,7 @@ class WhisperForConditionalGeneration(
stt_config: SpeechToTextConfig, stt_config: SpeechToTextConfig,
model_config: ModelConfig, model_config: ModelConfig,
) -> int | None: ) -> int | None:
processor = cached_get_processor(model_config.model) processor = cached_processor_from_config(model_config)
hop_length = processor.feature_extractor.hop_length hop_length = processor.feature_extractor.hop_length
assert hop_length is not None assert hop_length is not None
# NOTE(NickLucche) user can't pass encoder # NOTE(NickLucche) user can't pass encoder

View File

@ -6,8 +6,7 @@ from typing import TYPE_CHECKING, Generic, Protocol, TypeVar, cast
from vllm.config.multimodal import BaseDummyOptions from vllm.config.multimodal import BaseDummyOptions
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
from .cache import BaseMultiModalProcessorCache from .cache import BaseMultiModalProcessorCache
from .processing import ( from .processing import (

View File

@ -4,12 +4,21 @@
from .hf import HfTokenizer from .hf import HfTokenizer
from .mistral import MistralTokenizer from .mistral import MistralTokenizer
from .protocol import TokenizerLike from .protocol import TokenizerLike
from .registry import TokenizerRegistry, get_tokenizer from .registry import (
TokenizerRegistry,
cached_get_tokenizer,
cached_tokenizer_from_config,
get_tokenizer,
init_tokenizer_from_config,
)
__all__ = [ __all__ = [
"TokenizerLike", "TokenizerLike",
"HfTokenizer", "HfTokenizer",
"MistralTokenizer", "MistralTokenizer",
"TokenizerRegistry", "TokenizerRegistry",
"cached_get_tokenizer",
"get_tokenizer", "get_tokenizer",
"cached_tokenizer_from_config",
"init_tokenizer_from_config",
] ]

View File

@ -2,10 +2,12 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import importlib.util import importlib.util
from collections.abc import Callable from collections.abc import Callable
from functools import lru_cache
from pathlib import Path from pathlib import Path
from typing import TypeVar, overload from typing import TYPE_CHECKING, TypeVar, overload
import huggingface_hub import huggingface_hub
from typing_extensions import assert_never
import vllm.envs as envs import vllm.envs as envs
from vllm.logger import init_logger from vllm.logger import init_logger
@ -21,6 +23,9 @@ from vllm.utils.import_utils import resolve_obj_by_qualname
from .protocol import TokenizerLike from .protocol import TokenizerLike
if TYPE_CHECKING:
from vllm.config import ModelConfig
logger = init_logger(__name__) logger = init_logger(__name__)
_T = TypeVar("_T", bound=type[TokenizerLike]) _T = TypeVar("_T", bound=type[TokenizerLike])
@ -195,3 +200,34 @@ def get_tokenizer(
) )
return tokenizer return tokenizer
cached_get_tokenizer = lru_cache(get_tokenizer)
def cached_tokenizer_from_config(model_config: "ModelConfig", **kwargs):
return cached_get_tokenizer(
model_config.tokenizer,
tokenizer_mode=model_config.tokenizer_mode,
revision=model_config.tokenizer_revision,
trust_remote_code=model_config.trust_remote_code,
**kwargs,
)
def init_tokenizer_from_config(model_config: "ModelConfig"):
runner_type = model_config.runner_type
if runner_type == "generate" or runner_type == "draft":
truncation_side = "left"
elif runner_type == "pooling":
truncation_side = "right"
else:
assert_never(runner_type)
return get_tokenizer(
model_config.tokenizer,
tokenizer_mode=model_config.tokenizer_mode,
trust_remote_code=model_config.trust_remote_code,
revision=model_config.tokenizer_revision,
truncation_side=truncation_side,
)

View File

@ -2,17 +2,10 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import warnings import warnings
from functools import lru_cache from typing import Any
from typing import TYPE_CHECKING, Any
from typing_extensions import assert_never
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike, get_tokenizer from vllm.tokenizers import TokenizerLike
if TYPE_CHECKING:
from vllm.config import ModelConfig
logger = init_logger(__name__) logger = init_logger(__name__)
@ -28,18 +21,54 @@ def __getattr__(name: str):
) )
return TokenizerLike return TokenizerLike
if name == "get_cached_tokenizer": if name == "get_tokenizer":
from vllm.tokenizers.hf import get_cached_tokenizer from vllm.tokenizers import get_tokenizer
warnings.warn( warnings.warn(
"`vllm.transformers_utils.tokenizer.get_cached_tokenizer` " "`vllm.transformers_utils.tokenizer.get_tokenizer` "
"has been moved to `vllm.tokenizers.hf.get_cached_tokenizer`. " "has been moved to `vllm.tokenizers.get_tokenizer`. "
"The old name will be removed in v0.13.", "The old name will be removed in v0.13.",
DeprecationWarning, DeprecationWarning,
stacklevel=2, stacklevel=2,
) )
return get_cached_tokenizer return get_tokenizer
if name == "cached_get_tokenizer":
from vllm.tokenizers import cached_get_tokenizer
warnings.warn(
"`vllm.transformers_utils.tokenizer.cached_get_tokenizer` "
"has been moved to `vllm.tokenizers.cached_get_tokenizer`. "
"The old name will be removed in v0.13.",
DeprecationWarning,
stacklevel=2,
)
return cached_get_tokenizer
if name == "cached_tokenizer_from_config":
from vllm.tokenizers import cached_tokenizer_from_config
warnings.warn(
"`vllm.transformers_utils.tokenizer.cached_tokenizer_from_config` "
"has been moved to `vllm.tokenizers.cached_tokenizer_from_config`. "
"The old name will be removed in v0.13.",
DeprecationWarning,
stacklevel=2,
)
return cached_tokenizer_from_config
if name == "init_tokenizer_from_configs":
from vllm.tokenizers import init_tokenizer_from_config
warnings.warn(
"`vllm.transformers_utils.tokenizer.init_tokenizer_from_configs` "
"has been moved to `vllm.tokenizers.init_tokenizer_from_config`. "
"The old name will be removed in v0.13.",
DeprecationWarning,
stacklevel=2,
)
return init_tokenizer_from_config
raise AttributeError(f"module {__name__!r} has no attribute {name!r}") raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@ -92,37 +121,3 @@ def encode_tokens(
kw_args["add_special_tokens"] = add_special_tokens kw_args["add_special_tokens"] = add_special_tokens
return tokenizer.encode(text, **kw_args) return tokenizer.encode(text, **kw_args)
cached_get_tokenizer = lru_cache(get_tokenizer)
def cached_tokenizer_from_config(
model_config: "ModelConfig",
**kwargs: Any,
):
return cached_get_tokenizer(
model_config.tokenizer,
tokenizer_mode=model_config.tokenizer_mode,
revision=model_config.tokenizer_revision,
trust_remote_code=model_config.trust_remote_code,
**kwargs,
)
def init_tokenizer_from_configs(model_config: "ModelConfig"):
runner_type = model_config.runner_type
if runner_type == "generate" or runner_type == "draft":
truncation_side = "left"
elif runner_type == "pooling":
truncation_side = "right"
else:
assert_never(runner_type)
return get_tokenizer(
model_config.tokenizer,
tokenizer_mode=model_config.tokenizer_mode,
trust_remote_code=model_config.trust_remote_code,
revision=model_config.tokenizer_revision,
truncation_side=truncation_side,
)

View File

@ -26,10 +26,9 @@ from vllm.plugins.io_processors import get_io_processor
from vllm.pooling_params import PoolingParams from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.tasks import SupportedTask from vllm.tasks import SupportedTask
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike, init_tokenizer_from_config
from vllm.tracing import init_tracer from vllm.tracing import init_tracer
from vllm.transformers_utils.config import maybe_register_config_serialize_by_value from vllm.transformers_utils.config import maybe_register_config_serialize_by_value
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.utils.async_utils import cancel_task_threadsafe from vllm.utils.async_utils import cancel_task_threadsafe
from vllm.utils.collection_utils import as_list from vllm.utils.collection_utils import as_list
@ -112,7 +111,7 @@ class AsyncLLM(EngineClient):
if self.model_config.skip_tokenizer_init: if self.model_config.skip_tokenizer_init:
tokenizer = None tokenizer = None
else: else:
tokenizer = init_tokenizer_from_configs(self.model_config) tokenizer = init_tokenizer_from_config(self.model_config)
self.input_processor = InputProcessor(self.vllm_config, tokenizer) self.input_processor = InputProcessor(self.vllm_config, tokenizer)
self.io_processor = get_io_processor( self.io_processor = get_io_processor(

View File

@ -23,9 +23,8 @@ from vllm.plugins.io_processors import get_io_processor
from vllm.pooling_params import PoolingParams from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.tasks import SupportedTask from vllm.tasks import SupportedTask
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike, init_tokenizer_from_config
from vllm.tracing import init_tracer from vllm.tracing import init_tracer
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.v1.engine import EngineCoreRequest from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.core_client import EngineCoreClient from vllm.v1.engine.core_client import EngineCoreClient
@ -87,7 +86,7 @@ class LLMEngine:
if self.model_config.skip_tokenizer_init: if self.model_config.skip_tokenizer_init:
tokenizer = None tokenizer = None
else: else:
tokenizer = init_tokenizer_from_configs(self.model_config) tokenizer = init_tokenizer_from_config(self.model_config)
self.input_processor = InputProcessor(self.vllm_config, tokenizer) self.input_processor = InputProcessor(self.vllm_config, tokenizer)
self.io_processor = get_io_processor( self.io_processor = get_io_processor(

View File

@ -7,7 +7,7 @@ from typing import TYPE_CHECKING
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.reasoning import ReasoningParserManager from vllm.reasoning import ReasoningParserManager
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs from vllm.tokenizers import init_tokenizer_from_config
from vllm.utils.import_utils import LazyLoader from vllm.utils.import_utils import LazyLoader
from vllm.v1.structured_output.backend_guidance import GuidanceBackend from vllm.v1.structured_output.backend_guidance import GuidanceBackend
from vllm.v1.structured_output.backend_types import ( from vllm.v1.structured_output.backend_types import (
@ -61,7 +61,7 @@ class StructuredOutputManager:
# of CPUs. # of CPUs.
max_workers = max(1, (multiprocessing.cpu_count() + 1) // 2) max_workers = max(1, (multiprocessing.cpu_count() + 1) // 2)
self.executor = ThreadPoolExecutor(max_workers=max_workers) self.executor = ThreadPoolExecutor(max_workers=max_workers)
self.tokenizer = init_tokenizer_from_configs( self.tokenizer = init_tokenizer_from_config(
model_config=self.vllm_config.model_config model_config=self.vllm_config.model_config
) )
reasoning_parser = ( reasoning_parser = (