mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-16 09:57:09 +08:00
[Chore] Move tokenizer initialization methods (#29793)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
e2fbfc955e
commit
653591d5e7
@ -40,7 +40,7 @@ from vllm.engine.arg_utils import EngineArgs
|
||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
|
||||
try:
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
except ImportError:
|
||||
from backend_request_func import get_tokenizer
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@ from tqdm.asyncio import tqdm
|
||||
from transformers import PreTrainedTokenizerBase
|
||||
|
||||
try:
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
except ImportError:
|
||||
from backend_request_func import get_tokenizer
|
||||
|
||||
|
||||
@ -8,7 +8,7 @@ import torch
|
||||
|
||||
from vllm import LLM, SamplingParams
|
||||
from vllm.config.compilation import CompilationMode, DynamicShapesType
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
from vllm.utils.torch_utils import is_torch_equal_or_newer
|
||||
|
||||
|
||||
|
||||
@ -6,7 +6,7 @@ import pytest
|
||||
from vllm.config import ModelConfig
|
||||
from vllm.entrypoints.chat_utils import apply_hf_chat_template, load_chat_template
|
||||
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
from ...models.registry import HF_EXAMPLE_MODELS
|
||||
from ...utils import VLLM_PATH
|
||||
|
||||
@ -14,7 +14,7 @@ from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
|
||||
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
from vllm.v1.engine.async_llm import AsyncLLM
|
||||
|
||||
MODEL_NAME = "openai-community/gpt2"
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
|
||||
@ -14,7 +14,7 @@ from vllm.config.multimodal import MultiModalConfig
|
||||
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
|
||||
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
|
||||
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
from vllm.v1.engine.async_llm import AsyncLLM
|
||||
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
@ -7,7 +7,7 @@ import tempfile
|
||||
import pytest
|
||||
|
||||
from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
|
||||
@ -5,7 +5,7 @@ import pytest
|
||||
import pytest_asyncio
|
||||
import requests
|
||||
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
|
||||
@ -271,7 +271,7 @@ async def test_streaming_product_tool_call():
|
||||
|
||||
@pytest.fixture
|
||||
def qwen_tokenizer() -> TokenizerLike:
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
return get_tokenizer("Qwen/Qwen3-32B")
|
||||
|
||||
|
||||
@ -18,7 +18,7 @@ from tests.utils import RemoteOpenAIServer
|
||||
from vllm.entrypoints.pooling.embed.protocol import EmbeddingResponse
|
||||
from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
from vllm.utils.serial_utils import (
|
||||
EMBED_DTYPE_TO_TORCH_DTYPE,
|
||||
ENDIANNESS,
|
||||
|
||||
@ -12,7 +12,7 @@ import torch
|
||||
from tests.models.utils import check_embeddings_close
|
||||
from tests.utils import RemoteOpenAIServer
|
||||
from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
from vllm.utils.serial_utils import (
|
||||
EMBED_DTYPE_TO_TORCH_DTYPE,
|
||||
ENDIANNESS,
|
||||
|
||||
@ -28,8 +28,7 @@ from vllm.multimodal.utils import (
|
||||
encode_image_base64,
|
||||
encode_video_base64,
|
||||
)
|
||||
from vllm.tokenizers import MistralTokenizer
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import MistralTokenizer, get_tokenizer
|
||||
|
||||
from ..models.registry import HF_EXAMPLE_MODELS
|
||||
from ..utils import VLLM_PATH
|
||||
|
||||
@ -22,11 +22,8 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict
|
||||
from vllm.multimodal.cache import MultiModalProcessorOnlyCache
|
||||
from vllm.multimodal.inputs import MultiModalInputs
|
||||
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
|
||||
from vllm.tokenizers import MistralTokenizer
|
||||
from vllm.transformers_utils.tokenizer import (
|
||||
cached_tokenizer_from_config,
|
||||
encode_tokens,
|
||||
)
|
||||
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
|
||||
from vllm.transformers_utils.tokenizer import encode_tokens
|
||||
|
||||
from ....multimodal.utils import random_audio, random_image, random_video
|
||||
from ...registry import (
|
||||
|
||||
@ -31,7 +31,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, BatchedTensorInputs
|
||||
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
|
||||
from vllm.multimodal.utils import group_mm_kwargs_by_modality
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
||||
from vllm.tokenizers import cached_tokenizer_from_config
|
||||
from vllm.utils.collection_utils import is_list_of
|
||||
from vllm.utils.torch_utils import set_default_torch_dtype
|
||||
|
||||
|
||||
@ -13,7 +13,7 @@ from transformers import PretrainedConfig
|
||||
from vllm.config.model import ModelConfig, ModelDType, RunnerOption
|
||||
from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs
|
||||
from vllm.multimodal.processing import InputProcessingContext
|
||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
||||
from vllm.tokenizers import cached_tokenizer_from_config
|
||||
|
||||
from .. import ci_envs
|
||||
from .registry import HF_EXAMPLE_MODELS
|
||||
|
||||
@ -7,7 +7,7 @@ from vllm.config import ModelConfig
|
||||
from vllm.inputs import zip_enc_dec_prompts
|
||||
from vllm.inputs.parse import parse_raw_prompts
|
||||
from vllm.inputs.preprocess import InputPreprocessor
|
||||
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
|
||||
from vllm.tokenizers import init_tokenizer_from_config
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
@ -108,7 +108,7 @@ def test_zip_enc_dec_prompts(mm_processor_kwargs, expected_mm_kwargs):
|
||||
)
|
||||
def test_preprocessor_always_mm_code_path(model_id, prompt):
|
||||
model_config = ModelConfig(model=model_id)
|
||||
tokenizer = init_tokenizer_from_configs(model_config)
|
||||
tokenizer = init_tokenizer_from_config(model_config)
|
||||
input_preprocessor = InputPreprocessor(model_config, tokenizer)
|
||||
|
||||
# HF processor adds sep token
|
||||
|
||||
@ -5,8 +5,7 @@ from typing import _get_protocol_attrs # type: ignore
|
||||
import pytest
|
||||
from transformers import PreTrainedTokenizerBase
|
||||
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import TokenizerLike, get_tokenizer
|
||||
|
||||
|
||||
def _get_missing_attrs(obj: object, target: type):
|
||||
|
||||
@ -2,8 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
from pathlib import Path
|
||||
|
||||
from vllm.tokenizers import TokenizerLike, TokenizerRegistry
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import TokenizerLike, TokenizerRegistry, get_tokenizer
|
||||
|
||||
|
||||
class TestTokenizer(TokenizerLike):
|
||||
|
||||
@ -6,7 +6,7 @@ import pytest
|
||||
from vllm.entrypoints.openai.tool_parsers.deepseekv31_tool_parser import (
|
||||
DeepSeekV31ToolParser,
|
||||
)
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
MODEL = "deepseek-ai/DeepSeek-V3.1"
|
||||
|
||||
|
||||
@ -14,9 +14,8 @@ from vllm.entrypoints.openai.protocol import (
|
||||
ToolCall,
|
||||
)
|
||||
from vllm.entrypoints.openai.tool_parsers.ernie45_tool_parser import Ernie45ToolParser
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tokenizers import TokenizerLike, get_tokenizer
|
||||
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
# Use a common model that is likely to be available
|
||||
MODEL = "baidu/ERNIE-4.5-21B-A3B-Thinking"
|
||||
|
||||
@ -10,7 +10,7 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
|
||||
from vllm.entrypoints.openai.tool_parsers.glm4_moe_tool_parser import (
|
||||
Glm4MoeModelToolParser,
|
||||
)
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
@ -10,9 +10,8 @@ from partial_json_parser.core.options import Allow
|
||||
|
||||
from vllm.entrypoints.openai.protocol import DeltaMessage, FunctionCall, ToolCall
|
||||
from vllm.entrypoints.openai.tool_parsers.jamba_tool_parser import JambaToolParser
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tokenizers import TokenizerLike, get_tokenizer
|
||||
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
@ -8,7 +8,7 @@ import pytest
|
||||
|
||||
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
|
||||
from vllm.entrypoints.openai.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
@ -13,7 +13,7 @@ from vllm.entrypoints.openai.protocol import (
|
||||
ToolCall,
|
||||
)
|
||||
from vllm.entrypoints.openai.tool_parsers.minimax_tool_parser import MinimaxToolParser
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@ from openai_harmony import (
|
||||
|
||||
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
|
||||
from vllm.entrypoints.openai.tool_parsers.openai_tool_parser import OpenAIToolParser
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
MODEL = "gpt2"
|
||||
|
||||
|
||||
@ -17,9 +17,8 @@ from vllm.entrypoints.openai.tool_parsers.qwen3coder_tool_parser import (
|
||||
Qwen3CoderToolParser,
|
||||
)
|
||||
from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tokenizers import TokenizerLike, get_tokenizer
|
||||
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
@ -15,9 +15,8 @@ from vllm.entrypoints.openai.protocol import (
|
||||
ToolCall,
|
||||
)
|
||||
from vllm.entrypoints.openai.tool_parsers.seed_oss_tool_parser import SeedOssToolParser
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tokenizers import TokenizerLike, get_tokenizer
|
||||
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
@ -13,9 +13,8 @@ from vllm.entrypoints.openai.protocol import (
|
||||
ToolCall,
|
||||
)
|
||||
from vllm.entrypoints.openai.tool_parsers.xlam_tool_parser import xLAMToolParser
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tokenizers import TokenizerLike, get_tokenizer
|
||||
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
@ -6,8 +6,8 @@ only get the `eos_token_id` from the tokenizer as defined by
|
||||
`vllm.LLMEngine._get_eos_token_id`.
|
||||
"""
|
||||
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
from vllm.transformers_utils.config import try_get_generation_config
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
|
||||
def test_get_llama3_eos_token():
|
||||
|
||||
@ -44,7 +44,7 @@ from vllm.engine.arg_utils import AsyncEngineArgs
|
||||
from vllm.entrypoints.cli.serve import ServeSubcommand
|
||||
from vllm.model_executor.model_loader import get_model_loader
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
from vllm.utils.mem_constants import GB_bytes
|
||||
from vllm.utils.network_utils import get_open_port
|
||||
|
||||
@ -9,7 +9,7 @@ import regex as re
|
||||
from openai import BadRequestError
|
||||
|
||||
from tests.utils import RemoteOpenAIServer
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
# any model with a chat template should work here
|
||||
MODEL_NAME = "facebook/opt-125m"
|
||||
|
||||
@ -14,7 +14,7 @@ import pytest
|
||||
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from tests.conftest import VllmRunner
|
||||
|
||||
@ -47,7 +47,7 @@ from vllm.benchmarks.lib.endpoint_request_func import (
|
||||
)
|
||||
from vllm.benchmarks.lib.ready_checker import wait_for_endpoint
|
||||
from vllm.benchmarks.lib.utils import convert_to_pytorch_benchmark_format, write_to_json
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
from vllm.utils.gc_utils import freeze_gc_heap
|
||||
from vllm.utils.network_utils import join_host_port
|
||||
|
||||
|
||||
@ -444,7 +444,7 @@ def load_weights_using_from_2_way_softmax(
|
||||
)
|
||||
loaded_weights = pooling_model_cls.load_weights(model, weights, load_lm_head=True)
|
||||
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
tokenizer = get_tokenizer(
|
||||
model_config.tokenizer,
|
||||
@ -498,7 +498,7 @@ def load_weights_no_post_processing(model, weights: Iterable[tuple[str, torch.Te
|
||||
# Skip ModelForSequenceClassification in MRO to avoid infinite recursion
|
||||
loaded_weights = type(model).__mro__[1].load_weights(model, weights)
|
||||
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
tokenizer = get_tokenizer(
|
||||
model_config.tokenizer,
|
||||
|
||||
@ -45,6 +45,7 @@ from vllm.multimodal.processing import (
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.tokenizers import cached_tokenizer_from_config
|
||||
from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekVLV2Config
|
||||
from vllm.transformers_utils.processors.deepseek_ocr import (
|
||||
BASE_SIZE,
|
||||
@ -53,7 +54,6 @@ from vllm.transformers_utils.processors.deepseek_ocr import (
|
||||
DeepseekOCRProcessor,
|
||||
count_tiles,
|
||||
)
|
||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
from vllm.v1.sample.logits_processor import (
|
||||
AdapterLogitsProcessor,
|
||||
|
||||
@ -41,13 +41,13 @@ from vllm.multimodal.processing import (
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.tokenizers import cached_tokenizer_from_config
|
||||
from vllm.transformers_utils.configs.deepseek_vl2 import (
|
||||
DeepseekVLV2Config,
|
||||
MlpProjectorConfig,
|
||||
VisionEncoderConfig,
|
||||
)
|
||||
from vllm.transformers_utils.processors.deepseek_vl2 import DeepseekVLV2Processor
|
||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
from vllm.utils.torch_utils import set_default_torch_dtype
|
||||
|
||||
|
||||
@ -59,8 +59,8 @@ from vllm.multimodal.processing import (
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.processor import cached_get_processor
|
||||
from vllm.transformers_utils.tokenizer import cached_get_tokenizer
|
||||
from vllm.tokenizers import cached_tokenizer_from_config
|
||||
from vllm.transformers_utils.processor import cached_processor_from_config
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
|
||||
from .blip2 import Blip2QFormerModel
|
||||
@ -862,7 +862,7 @@ class GraniteSpeechForConditionalGeneration(
|
||||
else:
|
||||
raise ValueError(f"Unsupported task type {task_type}")
|
||||
|
||||
tokenizer = cached_get_tokenizer(model_config.model)
|
||||
tokenizer = cached_tokenizer_from_config(model_config)
|
||||
chat = [dict(role="user", content=user_prompt)]
|
||||
prompt = tokenizer.apply_chat_template(
|
||||
chat,
|
||||
@ -886,7 +886,7 @@ class GraniteSpeechForConditionalGeneration(
|
||||
model_config: ModelConfig,
|
||||
) -> int | None:
|
||||
"""Get the number of audio tokens for an audio duration in sec."""
|
||||
processor = cached_get_processor(model_config.model)
|
||||
processor = cached_processor_from_config(model_config)
|
||||
hop_length = processor.audio_processor.melspec_kwargs["hop_length"]
|
||||
proj_win_size = processor.audio_processor.projector_window_size
|
||||
ds_rate = processor.audio_processor.projector_downsample_rate
|
||||
|
||||
@ -19,7 +19,7 @@ from vllm.model_executor.layers.pooler import (
|
||||
)
|
||||
from vllm.model_executor.models.llama import LlamaForCausalLM
|
||||
from vllm.tasks import PoolingTask
|
||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
||||
from vllm.tokenizers import cached_tokenizer_from_config
|
||||
from vllm.v1.outputs import PoolerOutput
|
||||
from vllm.v1.pool.metadata import PoolingMetadata
|
||||
|
||||
|
||||
@ -73,12 +73,9 @@ from vllm.multimodal.processing import (
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config
|
||||
from vllm.transformers_utils.configs.radio import RadioConfig
|
||||
from vllm.transformers_utils.tokenizer import (
|
||||
cached_tokenizer_from_config,
|
||||
encode_tokens,
|
||||
)
|
||||
from vllm.transformers_utils.tokenizer import encode_tokens
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
|
||||
from .utils import _merge_multimodal_embeddings
|
||||
|
||||
@ -59,8 +59,7 @@ from vllm.multimodal.processing import (
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.tokenizers import MistralTokenizer
|
||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
||||
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
|
||||
from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
|
||||
|
||||
@ -51,8 +51,7 @@ from vllm.multimodal.processing import (
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.tokenizers import MistralTokenizer
|
||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
||||
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
|
||||
|
||||
from .interfaces import SupportsLoRA, SupportsMultiModal, SupportsTranscription
|
||||
from .utils import init_vllm_registered_model, maybe_prefix
|
||||
|
||||
@ -48,7 +48,7 @@ from vllm.multimodal.processing import (
|
||||
PromptUpdate,
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.transformers_utils.processor import cached_get_processor
|
||||
from vllm.transformers_utils.processor import cached_processor_from_config
|
||||
from vllm.utils.jsontree import json_map_leaves
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
from vllm.utils.torch_utils import set_default_torch_dtype
|
||||
@ -850,7 +850,7 @@ class WhisperForConditionalGeneration(
|
||||
def get_speech_to_text_config(
|
||||
cls, model_config: ModelConfig, task_type: str
|
||||
) -> SpeechToTextConfig:
|
||||
processor = cached_get_processor(model_config.model)
|
||||
processor = cached_processor_from_config(model_config)
|
||||
|
||||
return SpeechToTextConfig(
|
||||
max_audio_clip_s=processor.feature_extractor.chunk_length,
|
||||
@ -864,7 +864,7 @@ class WhisperForConditionalGeneration(
|
||||
stt_config: SpeechToTextConfig,
|
||||
model_config: ModelConfig,
|
||||
) -> int | None:
|
||||
processor = cached_get_processor(model_config.model)
|
||||
processor = cached_processor_from_config(model_config)
|
||||
hop_length = processor.feature_extractor.hop_length
|
||||
assert hop_length is not None
|
||||
# NOTE(NickLucche) user can't pass encoder
|
||||
|
||||
@ -6,8 +6,7 @@ from typing import TYPE_CHECKING, Generic, Protocol, TypeVar, cast
|
||||
|
||||
from vllm.config.multimodal import BaseDummyOptions
|
||||
from vllm.logger import init_logger
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
||||
from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config
|
||||
|
||||
from .cache import BaseMultiModalProcessorCache
|
||||
from .processing import (
|
||||
|
||||
@ -4,12 +4,21 @@
|
||||
from .hf import HfTokenizer
|
||||
from .mistral import MistralTokenizer
|
||||
from .protocol import TokenizerLike
|
||||
from .registry import TokenizerRegistry, get_tokenizer
|
||||
from .registry import (
|
||||
TokenizerRegistry,
|
||||
cached_get_tokenizer,
|
||||
cached_tokenizer_from_config,
|
||||
get_tokenizer,
|
||||
init_tokenizer_from_config,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"TokenizerLike",
|
||||
"HfTokenizer",
|
||||
"MistralTokenizer",
|
||||
"TokenizerRegistry",
|
||||
"cached_get_tokenizer",
|
||||
"get_tokenizer",
|
||||
"cached_tokenizer_from_config",
|
||||
"init_tokenizer_from_config",
|
||||
]
|
||||
|
||||
@ -2,10 +2,12 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import importlib.util
|
||||
from collections.abc import Callable
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import TypeVar, overload
|
||||
from typing import TYPE_CHECKING, TypeVar, overload
|
||||
|
||||
import huggingface_hub
|
||||
from typing_extensions import assert_never
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.logger import init_logger
|
||||
@ -21,6 +23,9 @@ from vllm.utils.import_utils import resolve_obj_by_qualname
|
||||
|
||||
from .protocol import TokenizerLike
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vllm.config import ModelConfig
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
_T = TypeVar("_T", bound=type[TokenizerLike])
|
||||
@ -195,3 +200,34 @@ def get_tokenizer(
|
||||
)
|
||||
|
||||
return tokenizer
|
||||
|
||||
|
||||
cached_get_tokenizer = lru_cache(get_tokenizer)
|
||||
|
||||
|
||||
def cached_tokenizer_from_config(model_config: "ModelConfig", **kwargs):
|
||||
return cached_get_tokenizer(
|
||||
model_config.tokenizer,
|
||||
tokenizer_mode=model_config.tokenizer_mode,
|
||||
revision=model_config.tokenizer_revision,
|
||||
trust_remote_code=model_config.trust_remote_code,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def init_tokenizer_from_config(model_config: "ModelConfig"):
|
||||
runner_type = model_config.runner_type
|
||||
if runner_type == "generate" or runner_type == "draft":
|
||||
truncation_side = "left"
|
||||
elif runner_type == "pooling":
|
||||
truncation_side = "right"
|
||||
else:
|
||||
assert_never(runner_type)
|
||||
|
||||
return get_tokenizer(
|
||||
model_config.tokenizer,
|
||||
tokenizer_mode=model_config.tokenizer_mode,
|
||||
trust_remote_code=model_config.trust_remote_code,
|
||||
revision=model_config.tokenizer_revision,
|
||||
truncation_side=truncation_side,
|
||||
)
|
||||
|
||||
@ -2,17 +2,10 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import warnings
|
||||
from functools import lru_cache
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from typing_extensions import assert_never
|
||||
from typing import Any
|
||||
|
||||
from vllm.logger import init_logger
|
||||
from vllm.tokenizers import TokenizerLike, get_tokenizer
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vllm.config import ModelConfig
|
||||
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@ -28,18 +21,54 @@ def __getattr__(name: str):
|
||||
)
|
||||
|
||||
return TokenizerLike
|
||||
if name == "get_cached_tokenizer":
|
||||
from vllm.tokenizers.hf import get_cached_tokenizer
|
||||
if name == "get_tokenizer":
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
warnings.warn(
|
||||
"`vllm.transformers_utils.tokenizer.get_cached_tokenizer` "
|
||||
"has been moved to `vllm.tokenizers.hf.get_cached_tokenizer`. "
|
||||
"`vllm.transformers_utils.tokenizer.get_tokenizer` "
|
||||
"has been moved to `vllm.tokenizers.get_tokenizer`. "
|
||||
"The old name will be removed in v0.13.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
return get_cached_tokenizer
|
||||
return get_tokenizer
|
||||
if name == "cached_get_tokenizer":
|
||||
from vllm.tokenizers import cached_get_tokenizer
|
||||
|
||||
warnings.warn(
|
||||
"`vllm.transformers_utils.tokenizer.cached_get_tokenizer` "
|
||||
"has been moved to `vllm.tokenizers.cached_get_tokenizer`. "
|
||||
"The old name will be removed in v0.13.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
return cached_get_tokenizer
|
||||
if name == "cached_tokenizer_from_config":
|
||||
from vllm.tokenizers import cached_tokenizer_from_config
|
||||
|
||||
warnings.warn(
|
||||
"`vllm.transformers_utils.tokenizer.cached_tokenizer_from_config` "
|
||||
"has been moved to `vllm.tokenizers.cached_tokenizer_from_config`. "
|
||||
"The old name will be removed in v0.13.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
return cached_tokenizer_from_config
|
||||
if name == "init_tokenizer_from_configs":
|
||||
from vllm.tokenizers import init_tokenizer_from_config
|
||||
|
||||
warnings.warn(
|
||||
"`vllm.transformers_utils.tokenizer.init_tokenizer_from_configs` "
|
||||
"has been moved to `vllm.tokenizers.init_tokenizer_from_config`. "
|
||||
"The old name will be removed in v0.13.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
return init_tokenizer_from_config
|
||||
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
@ -92,37 +121,3 @@ def encode_tokens(
|
||||
kw_args["add_special_tokens"] = add_special_tokens
|
||||
|
||||
return tokenizer.encode(text, **kw_args)
|
||||
|
||||
|
||||
cached_get_tokenizer = lru_cache(get_tokenizer)
|
||||
|
||||
|
||||
def cached_tokenizer_from_config(
|
||||
model_config: "ModelConfig",
|
||||
**kwargs: Any,
|
||||
):
|
||||
return cached_get_tokenizer(
|
||||
model_config.tokenizer,
|
||||
tokenizer_mode=model_config.tokenizer_mode,
|
||||
revision=model_config.tokenizer_revision,
|
||||
trust_remote_code=model_config.trust_remote_code,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def init_tokenizer_from_configs(model_config: "ModelConfig"):
|
||||
runner_type = model_config.runner_type
|
||||
if runner_type == "generate" or runner_type == "draft":
|
||||
truncation_side = "left"
|
||||
elif runner_type == "pooling":
|
||||
truncation_side = "right"
|
||||
else:
|
||||
assert_never(runner_type)
|
||||
|
||||
return get_tokenizer(
|
||||
model_config.tokenizer,
|
||||
tokenizer_mode=model_config.tokenizer_mode,
|
||||
trust_remote_code=model_config.trust_remote_code,
|
||||
revision=model_config.tokenizer_revision,
|
||||
truncation_side=truncation_side,
|
||||
)
|
||||
|
||||
@ -26,10 +26,9 @@ from vllm.plugins.io_processors import get_io_processor
|
||||
from vllm.pooling_params import PoolingParams
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.tasks import SupportedTask
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tokenizers import TokenizerLike, init_tokenizer_from_config
|
||||
from vllm.tracing import init_tracer
|
||||
from vllm.transformers_utils.config import maybe_register_config_serialize_by_value
|
||||
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils.async_utils import cancel_task_threadsafe
|
||||
from vllm.utils.collection_utils import as_list
|
||||
@ -112,7 +111,7 @@ class AsyncLLM(EngineClient):
|
||||
if self.model_config.skip_tokenizer_init:
|
||||
tokenizer = None
|
||||
else:
|
||||
tokenizer = init_tokenizer_from_configs(self.model_config)
|
||||
tokenizer = init_tokenizer_from_config(self.model_config)
|
||||
|
||||
self.input_processor = InputProcessor(self.vllm_config, tokenizer)
|
||||
self.io_processor = get_io_processor(
|
||||
|
||||
@ -23,9 +23,8 @@ from vllm.plugins.io_processors import get_io_processor
|
||||
from vllm.pooling_params import PoolingParams
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.tasks import SupportedTask
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tokenizers import TokenizerLike, init_tokenizer_from_config
|
||||
from vllm.tracing import init_tracer
|
||||
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.v1.engine import EngineCoreRequest
|
||||
from vllm.v1.engine.core_client import EngineCoreClient
|
||||
@ -87,7 +86,7 @@ class LLMEngine:
|
||||
if self.model_config.skip_tokenizer_init:
|
||||
tokenizer = None
|
||||
else:
|
||||
tokenizer = init_tokenizer_from_configs(self.model_config)
|
||||
tokenizer = init_tokenizer_from_config(self.model_config)
|
||||
|
||||
self.input_processor = InputProcessor(self.vllm_config, tokenizer)
|
||||
self.io_processor = get_io_processor(
|
||||
|
||||
@ -7,7 +7,7 @@ from typing import TYPE_CHECKING
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.logger import init_logger
|
||||
from vllm.reasoning import ReasoningParserManager
|
||||
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
|
||||
from vllm.tokenizers import init_tokenizer_from_config
|
||||
from vllm.utils.import_utils import LazyLoader
|
||||
from vllm.v1.structured_output.backend_guidance import GuidanceBackend
|
||||
from vllm.v1.structured_output.backend_types import (
|
||||
@ -61,7 +61,7 @@ class StructuredOutputManager:
|
||||
# of CPUs.
|
||||
max_workers = max(1, (multiprocessing.cpu_count() + 1) // 2)
|
||||
self.executor = ThreadPoolExecutor(max_workers=max_workers)
|
||||
self.tokenizer = init_tokenizer_from_configs(
|
||||
self.tokenizer = init_tokenizer_from_config(
|
||||
model_config=self.vllm_config.model_config
|
||||
)
|
||||
reasoning_parser = (
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user