mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-16 11:37:12 +08:00
[Chore] Move tokenizer initialization methods (#29793)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
e2fbfc955e
commit
653591d5e7
@ -40,7 +40,7 @@ from vllm.engine.arg_utils import EngineArgs
|
|||||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from backend_request_func import get_tokenizer
|
from backend_request_func import get_tokenizer
|
||||||
|
|
||||||
|
|||||||
@ -46,7 +46,7 @@ from tqdm.asyncio import tqdm
|
|||||||
from transformers import PreTrainedTokenizerBase
|
from transformers import PreTrainedTokenizerBase
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from backend_request_func import get_tokenizer
|
from backend_request_func import get_tokenizer
|
||||||
|
|
||||||
|
|||||||
@ -8,7 +8,7 @@ import torch
|
|||||||
|
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
from vllm.config.compilation import CompilationMode, DynamicShapesType
|
from vllm.config.compilation import CompilationMode, DynamicShapesType
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
from vllm.utils.torch_utils import is_torch_equal_or_newer
|
from vllm.utils.torch_utils import is_torch_equal_or_newer
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -6,7 +6,7 @@ import pytest
|
|||||||
from vllm.config import ModelConfig
|
from vllm.config import ModelConfig
|
||||||
from vllm.entrypoints.chat_utils import apply_hf_chat_template, load_chat_template
|
from vllm.entrypoints.chat_utils import apply_hf_chat_template, load_chat_template
|
||||||
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
|
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
from ...models.registry import HF_EXAMPLE_MODELS
|
from ...models.registry import HF_EXAMPLE_MODELS
|
||||||
from ...utils import VLLM_PATH
|
from ...utils import VLLM_PATH
|
||||||
|
|||||||
@ -14,7 +14,7 @@ from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
|
|||||||
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
|
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
|
||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
|
from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
from vllm.v1.engine.async_llm import AsyncLLM
|
from vllm.v1.engine.async_llm import AsyncLLM
|
||||||
|
|
||||||
MODEL_NAME = "openai-community/gpt2"
|
MODEL_NAME = "openai-community/gpt2"
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
from ...utils import RemoteOpenAIServer
|
||||||
|
|
||||||
|
|||||||
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
from ...utils import RemoteOpenAIServer
|
||||||
|
|
||||||
|
|||||||
@ -14,7 +14,7 @@ from vllm.config.multimodal import MultiModalConfig
|
|||||||
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
|
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
|
||||||
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
|
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
|
||||||
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
|
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
from vllm.v1.engine.async_llm import AsyncLLM
|
from vllm.v1.engine.async_llm import AsyncLLM
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
from ...utils import RemoteOpenAIServer
|
||||||
|
|||||||
@ -7,7 +7,7 @@ import tempfile
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf
|
from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
from ...utils import RemoteOpenAIServer
|
||||||
|
|
||||||
|
|||||||
@ -5,7 +5,7 @@ import pytest
|
|||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
from ...utils import RemoteOpenAIServer
|
||||||
|
|
||||||
|
|||||||
@ -271,7 +271,7 @@ async def test_streaming_product_tool_call():
|
|||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def qwen_tokenizer() -> TokenizerLike:
|
def qwen_tokenizer() -> TokenizerLike:
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
return get_tokenizer("Qwen/Qwen3-32B")
|
return get_tokenizer("Qwen/Qwen3-32B")
|
||||||
|
|
||||||
|
|||||||
@ -18,7 +18,7 @@ from tests.utils import RemoteOpenAIServer
|
|||||||
from vllm.entrypoints.pooling.embed.protocol import EmbeddingResponse
|
from vllm.entrypoints.pooling.embed.protocol import EmbeddingResponse
|
||||||
from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
|
from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
from vllm.utils.serial_utils import (
|
from vllm.utils.serial_utils import (
|
||||||
EMBED_DTYPE_TO_TORCH_DTYPE,
|
EMBED_DTYPE_TO_TORCH_DTYPE,
|
||||||
ENDIANNESS,
|
ENDIANNESS,
|
||||||
|
|||||||
@ -12,7 +12,7 @@ import torch
|
|||||||
from tests.models.utils import check_embeddings_close
|
from tests.models.utils import check_embeddings_close
|
||||||
from tests.utils import RemoteOpenAIServer
|
from tests.utils import RemoteOpenAIServer
|
||||||
from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
|
from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
from vllm.utils.serial_utils import (
|
from vllm.utils.serial_utils import (
|
||||||
EMBED_DTYPE_TO_TORCH_DTYPE,
|
EMBED_DTYPE_TO_TORCH_DTYPE,
|
||||||
ENDIANNESS,
|
ENDIANNESS,
|
||||||
|
|||||||
@ -28,8 +28,7 @@ from vllm.multimodal.utils import (
|
|||||||
encode_image_base64,
|
encode_image_base64,
|
||||||
encode_video_base64,
|
encode_video_base64,
|
||||||
)
|
)
|
||||||
from vllm.tokenizers import MistralTokenizer
|
from vllm.tokenizers import MistralTokenizer, get_tokenizer
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
|
||||||
|
|
||||||
from ..models.registry import HF_EXAMPLE_MODELS
|
from ..models.registry import HF_EXAMPLE_MODELS
|
||||||
from ..utils import VLLM_PATH
|
from ..utils import VLLM_PATH
|
||||||
|
|||||||
@ -22,11 +22,8 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict
|
|||||||
from vllm.multimodal.cache import MultiModalProcessorOnlyCache
|
from vllm.multimodal.cache import MultiModalProcessorOnlyCache
|
||||||
from vllm.multimodal.inputs import MultiModalInputs
|
from vllm.multimodal.inputs import MultiModalInputs
|
||||||
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
|
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
|
||||||
from vllm.tokenizers import MistralTokenizer
|
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
|
||||||
from vllm.transformers_utils.tokenizer import (
|
from vllm.transformers_utils.tokenizer import encode_tokens
|
||||||
cached_tokenizer_from_config,
|
|
||||||
encode_tokens,
|
|
||||||
)
|
|
||||||
|
|
||||||
from ....multimodal.utils import random_audio, random_image, random_video
|
from ....multimodal.utils import random_audio, random_image, random_video
|
||||||
from ...registry import (
|
from ...registry import (
|
||||||
|
|||||||
@ -31,7 +31,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, BatchedTensorInputs
|
|||||||
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
|
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
|
||||||
from vllm.multimodal.utils import group_mm_kwargs_by_modality
|
from vllm.multimodal.utils import group_mm_kwargs_by_modality
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
from vllm.tokenizers import cached_tokenizer_from_config
|
||||||
from vllm.utils.collection_utils import is_list_of
|
from vllm.utils.collection_utils import is_list_of
|
||||||
from vllm.utils.torch_utils import set_default_torch_dtype
|
from vllm.utils.torch_utils import set_default_torch_dtype
|
||||||
|
|
||||||
|
|||||||
@ -13,7 +13,7 @@ from transformers import PretrainedConfig
|
|||||||
from vllm.config.model import ModelConfig, ModelDType, RunnerOption
|
from vllm.config.model import ModelConfig, ModelDType, RunnerOption
|
||||||
from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs
|
from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs
|
||||||
from vllm.multimodal.processing import InputProcessingContext
|
from vllm.multimodal.processing import InputProcessingContext
|
||||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
from vllm.tokenizers import cached_tokenizer_from_config
|
||||||
|
|
||||||
from .. import ci_envs
|
from .. import ci_envs
|
||||||
from .registry import HF_EXAMPLE_MODELS
|
from .registry import HF_EXAMPLE_MODELS
|
||||||
|
|||||||
@ -7,7 +7,7 @@ from vllm.config import ModelConfig
|
|||||||
from vllm.inputs import zip_enc_dec_prompts
|
from vllm.inputs import zip_enc_dec_prompts
|
||||||
from vllm.inputs.parse import parse_raw_prompts
|
from vllm.inputs.parse import parse_raw_prompts
|
||||||
from vllm.inputs.preprocess import InputPreprocessor
|
from vllm.inputs.preprocess import InputPreprocessor
|
||||||
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
|
from vllm.tokenizers import init_tokenizer_from_config
|
||||||
|
|
||||||
pytestmark = pytest.mark.cpu_test
|
pytestmark = pytest.mark.cpu_test
|
||||||
|
|
||||||
@ -108,7 +108,7 @@ def test_zip_enc_dec_prompts(mm_processor_kwargs, expected_mm_kwargs):
|
|||||||
)
|
)
|
||||||
def test_preprocessor_always_mm_code_path(model_id, prompt):
|
def test_preprocessor_always_mm_code_path(model_id, prompt):
|
||||||
model_config = ModelConfig(model=model_id)
|
model_config = ModelConfig(model=model_id)
|
||||||
tokenizer = init_tokenizer_from_configs(model_config)
|
tokenizer = init_tokenizer_from_config(model_config)
|
||||||
input_preprocessor = InputPreprocessor(model_config, tokenizer)
|
input_preprocessor = InputPreprocessor(model_config, tokenizer)
|
||||||
|
|
||||||
# HF processor adds sep token
|
# HF processor adds sep token
|
||||||
|
|||||||
@ -5,8 +5,7 @@ from typing import _get_protocol_attrs # type: ignore
|
|||||||
import pytest
|
import pytest
|
||||||
from transformers import PreTrainedTokenizerBase
|
from transformers import PreTrainedTokenizerBase
|
||||||
|
|
||||||
from vllm.tokenizers import TokenizerLike
|
from vllm.tokenizers import TokenizerLike, get_tokenizer
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
|
||||||
|
|
||||||
|
|
||||||
def _get_missing_attrs(obj: object, target: type):
|
def _get_missing_attrs(obj: object, target: type):
|
||||||
|
|||||||
@ -2,8 +2,7 @@
|
|||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from vllm.tokenizers import TokenizerLike, TokenizerRegistry
|
from vllm.tokenizers import TokenizerLike, TokenizerRegistry, get_tokenizer
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
|
||||||
|
|
||||||
|
|
||||||
class TestTokenizer(TokenizerLike):
|
class TestTokenizer(TokenizerLike):
|
||||||
|
|||||||
@ -6,7 +6,7 @@ import pytest
|
|||||||
from vllm.entrypoints.openai.tool_parsers.deepseekv31_tool_parser import (
|
from vllm.entrypoints.openai.tool_parsers.deepseekv31_tool_parser import (
|
||||||
DeepSeekV31ToolParser,
|
DeepSeekV31ToolParser,
|
||||||
)
|
)
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
MODEL = "deepseek-ai/DeepSeek-V3.1"
|
MODEL = "deepseek-ai/DeepSeek-V3.1"
|
||||||
|
|
||||||
|
|||||||
@ -14,9 +14,8 @@ from vllm.entrypoints.openai.protocol import (
|
|||||||
ToolCall,
|
ToolCall,
|
||||||
)
|
)
|
||||||
from vllm.entrypoints.openai.tool_parsers.ernie45_tool_parser import Ernie45ToolParser
|
from vllm.entrypoints.openai.tool_parsers.ernie45_tool_parser import Ernie45ToolParser
|
||||||
from vllm.tokenizers import TokenizerLike
|
from vllm.tokenizers import TokenizerLike, get_tokenizer
|
||||||
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
|
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
|
||||||
|
|
||||||
# Use a common model that is likely to be available
|
# Use a common model that is likely to be available
|
||||||
MODEL = "baidu/ERNIE-4.5-21B-A3B-Thinking"
|
MODEL = "baidu/ERNIE-4.5-21B-A3B-Thinking"
|
||||||
|
|||||||
@ -10,7 +10,7 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
|
|||||||
from vllm.entrypoints.openai.tool_parsers.glm4_moe_tool_parser import (
|
from vllm.entrypoints.openai.tool_parsers.glm4_moe_tool_parser import (
|
||||||
Glm4MoeModelToolParser,
|
Glm4MoeModelToolParser,
|
||||||
)
|
)
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
pytestmark = pytest.mark.cpu_test
|
pytestmark = pytest.mark.cpu_test
|
||||||
|
|
||||||
|
|||||||
@ -10,9 +10,8 @@ from partial_json_parser.core.options import Allow
|
|||||||
|
|
||||||
from vllm.entrypoints.openai.protocol import DeltaMessage, FunctionCall, ToolCall
|
from vllm.entrypoints.openai.protocol import DeltaMessage, FunctionCall, ToolCall
|
||||||
from vllm.entrypoints.openai.tool_parsers.jamba_tool_parser import JambaToolParser
|
from vllm.entrypoints.openai.tool_parsers.jamba_tool_parser import JambaToolParser
|
||||||
from vllm.tokenizers import TokenizerLike
|
from vllm.tokenizers import TokenizerLike, get_tokenizer
|
||||||
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
|
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
|
||||||
|
|
||||||
pytestmark = pytest.mark.cpu_test
|
pytestmark = pytest.mark.cpu_test
|
||||||
|
|
||||||
|
|||||||
@ -8,7 +8,7 @@ import pytest
|
|||||||
|
|
||||||
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
|
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
|
||||||
from vllm.entrypoints.openai.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser
|
from vllm.entrypoints.openai.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
pytestmark = pytest.mark.cpu_test
|
pytestmark = pytest.mark.cpu_test
|
||||||
|
|
||||||
|
|||||||
@ -13,7 +13,7 @@ from vllm.entrypoints.openai.protocol import (
|
|||||||
ToolCall,
|
ToolCall,
|
||||||
)
|
)
|
||||||
from vllm.entrypoints.openai.tool_parsers.minimax_tool_parser import MinimaxToolParser
|
from vllm.entrypoints.openai.tool_parsers.minimax_tool_parser import MinimaxToolParser
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
pytestmark = pytest.mark.cpu_test
|
pytestmark = pytest.mark.cpu_test
|
||||||
|
|
||||||
|
|||||||
@ -16,7 +16,7 @@ from openai_harmony import (
|
|||||||
|
|
||||||
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
|
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
|
||||||
from vllm.entrypoints.openai.tool_parsers.openai_tool_parser import OpenAIToolParser
|
from vllm.entrypoints.openai.tool_parsers.openai_tool_parser import OpenAIToolParser
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
MODEL = "gpt2"
|
MODEL = "gpt2"
|
||||||
|
|
||||||
|
|||||||
@ -17,9 +17,8 @@ from vllm.entrypoints.openai.tool_parsers.qwen3coder_tool_parser import (
|
|||||||
Qwen3CoderToolParser,
|
Qwen3CoderToolParser,
|
||||||
)
|
)
|
||||||
from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser
|
from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser
|
||||||
from vllm.tokenizers import TokenizerLike
|
from vllm.tokenizers import TokenizerLike, get_tokenizer
|
||||||
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
|
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
|
||||||
|
|
||||||
pytestmark = pytest.mark.cpu_test
|
pytestmark = pytest.mark.cpu_test
|
||||||
|
|
||||||
|
|||||||
@ -15,9 +15,8 @@ from vllm.entrypoints.openai.protocol import (
|
|||||||
ToolCall,
|
ToolCall,
|
||||||
)
|
)
|
||||||
from vllm.entrypoints.openai.tool_parsers.seed_oss_tool_parser import SeedOssToolParser
|
from vllm.entrypoints.openai.tool_parsers.seed_oss_tool_parser import SeedOssToolParser
|
||||||
from vllm.tokenizers import TokenizerLike
|
from vllm.tokenizers import TokenizerLike, get_tokenizer
|
||||||
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
|
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
|
||||||
|
|
||||||
pytestmark = pytest.mark.cpu_test
|
pytestmark = pytest.mark.cpu_test
|
||||||
|
|
||||||
|
|||||||
@ -13,9 +13,8 @@ from vllm.entrypoints.openai.protocol import (
|
|||||||
ToolCall,
|
ToolCall,
|
||||||
)
|
)
|
||||||
from vllm.entrypoints.openai.tool_parsers.xlam_tool_parser import xLAMToolParser
|
from vllm.entrypoints.openai.tool_parsers.xlam_tool_parser import xLAMToolParser
|
||||||
from vllm.tokenizers import TokenizerLike
|
from vllm.tokenizers import TokenizerLike, get_tokenizer
|
||||||
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
|
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
|
||||||
|
|
||||||
pytestmark = pytest.mark.cpu_test
|
pytestmark = pytest.mark.cpu_test
|
||||||
|
|
||||||
|
|||||||
@ -6,8 +6,8 @@ only get the `eos_token_id` from the tokenizer as defined by
|
|||||||
`vllm.LLMEngine._get_eos_token_id`.
|
`vllm.LLMEngine._get_eos_token_id`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from vllm.tokenizers import get_tokenizer
|
||||||
from vllm.transformers_utils.config import try_get_generation_config
|
from vllm.transformers_utils.config import try_get_generation_config
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
|
||||||
|
|
||||||
|
|
||||||
def test_get_llama3_eos_token():
|
def test_get_llama3_eos_token():
|
||||||
|
|||||||
@ -44,7 +44,7 @@ from vllm.engine.arg_utils import AsyncEngineArgs
|
|||||||
from vllm.entrypoints.cli.serve import ServeSubcommand
|
from vllm.entrypoints.cli.serve import ServeSubcommand
|
||||||
from vllm.model_executor.model_loader import get_model_loader
|
from vllm.model_executor.model_loader import get_model_loader
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||||
from vllm.utils.mem_constants import GB_bytes
|
from vllm.utils.mem_constants import GB_bytes
|
||||||
from vllm.utils.network_utils import get_open_port
|
from vllm.utils.network_utils import get_open_port
|
||||||
|
|||||||
@ -9,7 +9,7 @@ import regex as re
|
|||||||
from openai import BadRequestError
|
from openai import BadRequestError
|
||||||
|
|
||||||
from tests.utils import RemoteOpenAIServer
|
from tests.utils import RemoteOpenAIServer
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
# any model with a chat template should work here
|
# any model with a chat template should work here
|
||||||
MODEL_NAME = "facebook/opt-125m"
|
MODEL_NAME = "facebook/opt-125m"
|
||||||
|
|||||||
@ -14,7 +14,7 @@ import pytest
|
|||||||
|
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from tests.conftest import VllmRunner
|
from tests.conftest import VllmRunner
|
||||||
|
|||||||
@ -47,7 +47,7 @@ from vllm.benchmarks.lib.endpoint_request_func import (
|
|||||||
)
|
)
|
||||||
from vllm.benchmarks.lib.ready_checker import wait_for_endpoint
|
from vllm.benchmarks.lib.ready_checker import wait_for_endpoint
|
||||||
from vllm.benchmarks.lib.utils import convert_to_pytorch_benchmark_format, write_to_json
|
from vllm.benchmarks.lib.utils import convert_to_pytorch_benchmark_format, write_to_json
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
from vllm.utils.gc_utils import freeze_gc_heap
|
from vllm.utils.gc_utils import freeze_gc_heap
|
||||||
from vllm.utils.network_utils import join_host_port
|
from vllm.utils.network_utils import join_host_port
|
||||||
|
|
||||||
|
|||||||
@ -444,7 +444,7 @@ def load_weights_using_from_2_way_softmax(
|
|||||||
)
|
)
|
||||||
loaded_weights = pooling_model_cls.load_weights(model, weights, load_lm_head=True)
|
loaded_weights = pooling_model_cls.load_weights(model, weights, load_lm_head=True)
|
||||||
|
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
tokenizer = get_tokenizer(
|
tokenizer = get_tokenizer(
|
||||||
model_config.tokenizer,
|
model_config.tokenizer,
|
||||||
@ -498,7 +498,7 @@ def load_weights_no_post_processing(model, weights: Iterable[tuple[str, torch.Te
|
|||||||
# Skip ModelForSequenceClassification in MRO to avoid infinite recursion
|
# Skip ModelForSequenceClassification in MRO to avoid infinite recursion
|
||||||
loaded_weights = type(model).__mro__[1].load_weights(model, weights)
|
loaded_weights = type(model).__mro__[1].load_weights(model, weights)
|
||||||
|
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
tokenizer = get_tokenizer(
|
tokenizer = get_tokenizer(
|
||||||
model_config.tokenizer,
|
model_config.tokenizer,
|
||||||
|
|||||||
@ -45,6 +45,7 @@ from vllm.multimodal.processing import (
|
|||||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
from vllm.tokenizers import cached_tokenizer_from_config
|
||||||
from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekVLV2Config
|
from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekVLV2Config
|
||||||
from vllm.transformers_utils.processors.deepseek_ocr import (
|
from vllm.transformers_utils.processors.deepseek_ocr import (
|
||||||
BASE_SIZE,
|
BASE_SIZE,
|
||||||
@ -53,7 +54,6 @@ from vllm.transformers_utils.processors.deepseek_ocr import (
|
|||||||
DeepseekOCRProcessor,
|
DeepseekOCRProcessor,
|
||||||
count_tiles,
|
count_tiles,
|
||||||
)
|
)
|
||||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
|
||||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||||
from vllm.v1.sample.logits_processor import (
|
from vllm.v1.sample.logits_processor import (
|
||||||
AdapterLogitsProcessor,
|
AdapterLogitsProcessor,
|
||||||
|
|||||||
@ -41,13 +41,13 @@ from vllm.multimodal.processing import (
|
|||||||
)
|
)
|
||||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
from vllm.tokenizers import cached_tokenizer_from_config
|
||||||
from vllm.transformers_utils.configs.deepseek_vl2 import (
|
from vllm.transformers_utils.configs.deepseek_vl2 import (
|
||||||
DeepseekVLV2Config,
|
DeepseekVLV2Config,
|
||||||
MlpProjectorConfig,
|
MlpProjectorConfig,
|
||||||
VisionEncoderConfig,
|
VisionEncoderConfig,
|
||||||
)
|
)
|
||||||
from vllm.transformers_utils.processors.deepseek_vl2 import DeepseekVLV2Processor
|
from vllm.transformers_utils.processors.deepseek_vl2 import DeepseekVLV2Processor
|
||||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
|
||||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||||
from vllm.utils.torch_utils import set_default_torch_dtype
|
from vllm.utils.torch_utils import set_default_torch_dtype
|
||||||
|
|
||||||
|
|||||||
@ -59,8 +59,8 @@ from vllm.multimodal.processing import (
|
|||||||
)
|
)
|
||||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
from vllm.transformers_utils.processor import cached_get_processor
|
from vllm.tokenizers import cached_tokenizer_from_config
|
||||||
from vllm.transformers_utils.tokenizer import cached_get_tokenizer
|
from vllm.transformers_utils.processor import cached_processor_from_config
|
||||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||||
|
|
||||||
from .blip2 import Blip2QFormerModel
|
from .blip2 import Blip2QFormerModel
|
||||||
@ -862,7 +862,7 @@ class GraniteSpeechForConditionalGeneration(
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported task type {task_type}")
|
raise ValueError(f"Unsupported task type {task_type}")
|
||||||
|
|
||||||
tokenizer = cached_get_tokenizer(model_config.model)
|
tokenizer = cached_tokenizer_from_config(model_config)
|
||||||
chat = [dict(role="user", content=user_prompt)]
|
chat = [dict(role="user", content=user_prompt)]
|
||||||
prompt = tokenizer.apply_chat_template(
|
prompt = tokenizer.apply_chat_template(
|
||||||
chat,
|
chat,
|
||||||
@ -886,7 +886,7 @@ class GraniteSpeechForConditionalGeneration(
|
|||||||
model_config: ModelConfig,
|
model_config: ModelConfig,
|
||||||
) -> int | None:
|
) -> int | None:
|
||||||
"""Get the number of audio tokens for an audio duration in sec."""
|
"""Get the number of audio tokens for an audio duration in sec."""
|
||||||
processor = cached_get_processor(model_config.model)
|
processor = cached_processor_from_config(model_config)
|
||||||
hop_length = processor.audio_processor.melspec_kwargs["hop_length"]
|
hop_length = processor.audio_processor.melspec_kwargs["hop_length"]
|
||||||
proj_win_size = processor.audio_processor.projector_window_size
|
proj_win_size = processor.audio_processor.projector_window_size
|
||||||
ds_rate = processor.audio_processor.projector_downsample_rate
|
ds_rate = processor.audio_processor.projector_downsample_rate
|
||||||
|
|||||||
@ -19,7 +19,7 @@ from vllm.model_executor.layers.pooler import (
|
|||||||
)
|
)
|
||||||
from vllm.model_executor.models.llama import LlamaForCausalLM
|
from vllm.model_executor.models.llama import LlamaForCausalLM
|
||||||
from vllm.tasks import PoolingTask
|
from vllm.tasks import PoolingTask
|
||||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
from vllm.tokenizers import cached_tokenizer_from_config
|
||||||
from vllm.v1.outputs import PoolerOutput
|
from vllm.v1.outputs import PoolerOutput
|
||||||
from vllm.v1.pool.metadata import PoolingMetadata
|
from vllm.v1.pool.metadata import PoolingMetadata
|
||||||
|
|
||||||
|
|||||||
@ -73,12 +73,9 @@ from vllm.multimodal.processing import (
|
|||||||
)
|
)
|
||||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
from vllm.tokenizers import TokenizerLike
|
from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config
|
||||||
from vllm.transformers_utils.configs.radio import RadioConfig
|
from vllm.transformers_utils.configs.radio import RadioConfig
|
||||||
from vllm.transformers_utils.tokenizer import (
|
from vllm.transformers_utils.tokenizer import encode_tokens
|
||||||
cached_tokenizer_from_config,
|
|
||||||
encode_tokens,
|
|
||||||
)
|
|
||||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||||
|
|
||||||
from .utils import _merge_multimodal_embeddings
|
from .utils import _merge_multimodal_embeddings
|
||||||
|
|||||||
@ -59,8 +59,7 @@ from vllm.multimodal.processing import (
|
|||||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
|
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
from vllm.tokenizers import MistralTokenizer
|
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
|
||||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
|
||||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||||
|
|
||||||
from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
|
from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
|
||||||
|
|||||||
@ -51,8 +51,7 @@ from vllm.multimodal.processing import (
|
|||||||
)
|
)
|
||||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
|
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
from vllm.tokenizers import MistralTokenizer
|
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
|
||||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
|
||||||
|
|
||||||
from .interfaces import SupportsLoRA, SupportsMultiModal, SupportsTranscription
|
from .interfaces import SupportsLoRA, SupportsMultiModal, SupportsTranscription
|
||||||
from .utils import init_vllm_registered_model, maybe_prefix
|
from .utils import init_vllm_registered_model, maybe_prefix
|
||||||
|
|||||||
@ -48,7 +48,7 @@ from vllm.multimodal.processing import (
|
|||||||
PromptUpdate,
|
PromptUpdate,
|
||||||
)
|
)
|
||||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||||
from vllm.transformers_utils.processor import cached_get_processor
|
from vllm.transformers_utils.processor import cached_processor_from_config
|
||||||
from vllm.utils.jsontree import json_map_leaves
|
from vllm.utils.jsontree import json_map_leaves
|
||||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||||
from vllm.utils.torch_utils import set_default_torch_dtype
|
from vllm.utils.torch_utils import set_default_torch_dtype
|
||||||
@ -850,7 +850,7 @@ class WhisperForConditionalGeneration(
|
|||||||
def get_speech_to_text_config(
|
def get_speech_to_text_config(
|
||||||
cls, model_config: ModelConfig, task_type: str
|
cls, model_config: ModelConfig, task_type: str
|
||||||
) -> SpeechToTextConfig:
|
) -> SpeechToTextConfig:
|
||||||
processor = cached_get_processor(model_config.model)
|
processor = cached_processor_from_config(model_config)
|
||||||
|
|
||||||
return SpeechToTextConfig(
|
return SpeechToTextConfig(
|
||||||
max_audio_clip_s=processor.feature_extractor.chunk_length,
|
max_audio_clip_s=processor.feature_extractor.chunk_length,
|
||||||
@ -864,7 +864,7 @@ class WhisperForConditionalGeneration(
|
|||||||
stt_config: SpeechToTextConfig,
|
stt_config: SpeechToTextConfig,
|
||||||
model_config: ModelConfig,
|
model_config: ModelConfig,
|
||||||
) -> int | None:
|
) -> int | None:
|
||||||
processor = cached_get_processor(model_config.model)
|
processor = cached_processor_from_config(model_config)
|
||||||
hop_length = processor.feature_extractor.hop_length
|
hop_length = processor.feature_extractor.hop_length
|
||||||
assert hop_length is not None
|
assert hop_length is not None
|
||||||
# NOTE(NickLucche) user can't pass encoder
|
# NOTE(NickLucche) user can't pass encoder
|
||||||
|
|||||||
@ -6,8 +6,7 @@ from typing import TYPE_CHECKING, Generic, Protocol, TypeVar, cast
|
|||||||
|
|
||||||
from vllm.config.multimodal import BaseDummyOptions
|
from vllm.config.multimodal import BaseDummyOptions
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.tokenizers import TokenizerLike
|
from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config
|
||||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
|
||||||
|
|
||||||
from .cache import BaseMultiModalProcessorCache
|
from .cache import BaseMultiModalProcessorCache
|
||||||
from .processing import (
|
from .processing import (
|
||||||
|
|||||||
@ -4,12 +4,21 @@
|
|||||||
from .hf import HfTokenizer
|
from .hf import HfTokenizer
|
||||||
from .mistral import MistralTokenizer
|
from .mistral import MistralTokenizer
|
||||||
from .protocol import TokenizerLike
|
from .protocol import TokenizerLike
|
||||||
from .registry import TokenizerRegistry, get_tokenizer
|
from .registry import (
|
||||||
|
TokenizerRegistry,
|
||||||
|
cached_get_tokenizer,
|
||||||
|
cached_tokenizer_from_config,
|
||||||
|
get_tokenizer,
|
||||||
|
init_tokenizer_from_config,
|
||||||
|
)
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"TokenizerLike",
|
"TokenizerLike",
|
||||||
"HfTokenizer",
|
"HfTokenizer",
|
||||||
"MistralTokenizer",
|
"MistralTokenizer",
|
||||||
"TokenizerRegistry",
|
"TokenizerRegistry",
|
||||||
|
"cached_get_tokenizer",
|
||||||
"get_tokenizer",
|
"get_tokenizer",
|
||||||
|
"cached_tokenizer_from_config",
|
||||||
|
"init_tokenizer_from_config",
|
||||||
]
|
]
|
||||||
|
|||||||
@ -2,10 +2,12 @@
|
|||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
import importlib.util
|
import importlib.util
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
|
from functools import lru_cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TypeVar, overload
|
from typing import TYPE_CHECKING, TypeVar, overload
|
||||||
|
|
||||||
import huggingface_hub
|
import huggingface_hub
|
||||||
|
from typing_extensions import assert_never
|
||||||
|
|
||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
@ -21,6 +23,9 @@ from vllm.utils.import_utils import resolve_obj_by_qualname
|
|||||||
|
|
||||||
from .protocol import TokenizerLike
|
from .protocol import TokenizerLike
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from vllm.config import ModelConfig
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
_T = TypeVar("_T", bound=type[TokenizerLike])
|
_T = TypeVar("_T", bound=type[TokenizerLike])
|
||||||
@ -195,3 +200,34 @@ def get_tokenizer(
|
|||||||
)
|
)
|
||||||
|
|
||||||
return tokenizer
|
return tokenizer
|
||||||
|
|
||||||
|
|
||||||
|
cached_get_tokenizer = lru_cache(get_tokenizer)
|
||||||
|
|
||||||
|
|
||||||
|
def cached_tokenizer_from_config(model_config: "ModelConfig", **kwargs):
|
||||||
|
return cached_get_tokenizer(
|
||||||
|
model_config.tokenizer,
|
||||||
|
tokenizer_mode=model_config.tokenizer_mode,
|
||||||
|
revision=model_config.tokenizer_revision,
|
||||||
|
trust_remote_code=model_config.trust_remote_code,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def init_tokenizer_from_config(model_config: "ModelConfig"):
|
||||||
|
runner_type = model_config.runner_type
|
||||||
|
if runner_type == "generate" or runner_type == "draft":
|
||||||
|
truncation_side = "left"
|
||||||
|
elif runner_type == "pooling":
|
||||||
|
truncation_side = "right"
|
||||||
|
else:
|
||||||
|
assert_never(runner_type)
|
||||||
|
|
||||||
|
return get_tokenizer(
|
||||||
|
model_config.tokenizer,
|
||||||
|
tokenizer_mode=model_config.tokenizer_mode,
|
||||||
|
trust_remote_code=model_config.trust_remote_code,
|
||||||
|
revision=model_config.tokenizer_revision,
|
||||||
|
truncation_side=truncation_side,
|
||||||
|
)
|
||||||
|
|||||||
@ -2,17 +2,10 @@
|
|||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
import warnings
|
import warnings
|
||||||
from functools import lru_cache
|
from typing import Any
|
||||||
from typing import TYPE_CHECKING, Any
|
|
||||||
|
|
||||||
from typing_extensions import assert_never
|
|
||||||
|
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.tokenizers import TokenizerLike, get_tokenizer
|
from vllm.tokenizers import TokenizerLike
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from vllm.config import ModelConfig
|
|
||||||
|
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
@ -28,18 +21,54 @@ def __getattr__(name: str):
|
|||||||
)
|
)
|
||||||
|
|
||||||
return TokenizerLike
|
return TokenizerLike
|
||||||
if name == "get_cached_tokenizer":
|
if name == "get_tokenizer":
|
||||||
from vllm.tokenizers.hf import get_cached_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
|
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"`vllm.transformers_utils.tokenizer.get_cached_tokenizer` "
|
"`vllm.transformers_utils.tokenizer.get_tokenizer` "
|
||||||
"has been moved to `vllm.tokenizers.hf.get_cached_tokenizer`. "
|
"has been moved to `vllm.tokenizers.get_tokenizer`. "
|
||||||
"The old name will be removed in v0.13.",
|
"The old name will be removed in v0.13.",
|
||||||
DeprecationWarning,
|
DeprecationWarning,
|
||||||
stacklevel=2,
|
stacklevel=2,
|
||||||
)
|
)
|
||||||
|
|
||||||
return get_cached_tokenizer
|
return get_tokenizer
|
||||||
|
if name == "cached_get_tokenizer":
|
||||||
|
from vllm.tokenizers import cached_get_tokenizer
|
||||||
|
|
||||||
|
warnings.warn(
|
||||||
|
"`vllm.transformers_utils.tokenizer.cached_get_tokenizer` "
|
||||||
|
"has been moved to `vllm.tokenizers.cached_get_tokenizer`. "
|
||||||
|
"The old name will be removed in v0.13.",
|
||||||
|
DeprecationWarning,
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
return cached_get_tokenizer
|
||||||
|
if name == "cached_tokenizer_from_config":
|
||||||
|
from vllm.tokenizers import cached_tokenizer_from_config
|
||||||
|
|
||||||
|
warnings.warn(
|
||||||
|
"`vllm.transformers_utils.tokenizer.cached_tokenizer_from_config` "
|
||||||
|
"has been moved to `vllm.tokenizers.cached_tokenizer_from_config`. "
|
||||||
|
"The old name will be removed in v0.13.",
|
||||||
|
DeprecationWarning,
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
return cached_tokenizer_from_config
|
||||||
|
if name == "init_tokenizer_from_configs":
|
||||||
|
from vllm.tokenizers import init_tokenizer_from_config
|
||||||
|
|
||||||
|
warnings.warn(
|
||||||
|
"`vllm.transformers_utils.tokenizer.init_tokenizer_from_configs` "
|
||||||
|
"has been moved to `vllm.tokenizers.init_tokenizer_from_config`. "
|
||||||
|
"The old name will be removed in v0.13.",
|
||||||
|
DeprecationWarning,
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
return init_tokenizer_from_config
|
||||||
|
|
||||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||||
|
|
||||||
@ -92,37 +121,3 @@ def encode_tokens(
|
|||||||
kw_args["add_special_tokens"] = add_special_tokens
|
kw_args["add_special_tokens"] = add_special_tokens
|
||||||
|
|
||||||
return tokenizer.encode(text, **kw_args)
|
return tokenizer.encode(text, **kw_args)
|
||||||
|
|
||||||
|
|
||||||
cached_get_tokenizer = lru_cache(get_tokenizer)
|
|
||||||
|
|
||||||
|
|
||||||
def cached_tokenizer_from_config(
|
|
||||||
model_config: "ModelConfig",
|
|
||||||
**kwargs: Any,
|
|
||||||
):
|
|
||||||
return cached_get_tokenizer(
|
|
||||||
model_config.tokenizer,
|
|
||||||
tokenizer_mode=model_config.tokenizer_mode,
|
|
||||||
revision=model_config.tokenizer_revision,
|
|
||||||
trust_remote_code=model_config.trust_remote_code,
|
|
||||||
**kwargs,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def init_tokenizer_from_configs(model_config: "ModelConfig"):
|
|
||||||
runner_type = model_config.runner_type
|
|
||||||
if runner_type == "generate" or runner_type == "draft":
|
|
||||||
truncation_side = "left"
|
|
||||||
elif runner_type == "pooling":
|
|
||||||
truncation_side = "right"
|
|
||||||
else:
|
|
||||||
assert_never(runner_type)
|
|
||||||
|
|
||||||
return get_tokenizer(
|
|
||||||
model_config.tokenizer,
|
|
||||||
tokenizer_mode=model_config.tokenizer_mode,
|
|
||||||
trust_remote_code=model_config.trust_remote_code,
|
|
||||||
revision=model_config.tokenizer_revision,
|
|
||||||
truncation_side=truncation_side,
|
|
||||||
)
|
|
||||||
|
|||||||
@ -26,10 +26,9 @@ from vllm.plugins.io_processors import get_io_processor
|
|||||||
from vllm.pooling_params import PoolingParams
|
from vllm.pooling_params import PoolingParams
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
from vllm.tasks import SupportedTask
|
from vllm.tasks import SupportedTask
|
||||||
from vllm.tokenizers import TokenizerLike
|
from vllm.tokenizers import TokenizerLike, init_tokenizer_from_config
|
||||||
from vllm.tracing import init_tracer
|
from vllm.tracing import init_tracer
|
||||||
from vllm.transformers_utils.config import maybe_register_config_serialize_by_value
|
from vllm.transformers_utils.config import maybe_register_config_serialize_by_value
|
||||||
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
|
|
||||||
from vllm.usage.usage_lib import UsageContext
|
from vllm.usage.usage_lib import UsageContext
|
||||||
from vllm.utils.async_utils import cancel_task_threadsafe
|
from vllm.utils.async_utils import cancel_task_threadsafe
|
||||||
from vllm.utils.collection_utils import as_list
|
from vllm.utils.collection_utils import as_list
|
||||||
@ -112,7 +111,7 @@ class AsyncLLM(EngineClient):
|
|||||||
if self.model_config.skip_tokenizer_init:
|
if self.model_config.skip_tokenizer_init:
|
||||||
tokenizer = None
|
tokenizer = None
|
||||||
else:
|
else:
|
||||||
tokenizer = init_tokenizer_from_configs(self.model_config)
|
tokenizer = init_tokenizer_from_config(self.model_config)
|
||||||
|
|
||||||
self.input_processor = InputProcessor(self.vllm_config, tokenizer)
|
self.input_processor = InputProcessor(self.vllm_config, tokenizer)
|
||||||
self.io_processor = get_io_processor(
|
self.io_processor = get_io_processor(
|
||||||
|
|||||||
@ -23,9 +23,8 @@ from vllm.plugins.io_processors import get_io_processor
|
|||||||
from vllm.pooling_params import PoolingParams
|
from vllm.pooling_params import PoolingParams
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
from vllm.tasks import SupportedTask
|
from vllm.tasks import SupportedTask
|
||||||
from vllm.tokenizers import TokenizerLike
|
from vllm.tokenizers import TokenizerLike, init_tokenizer_from_config
|
||||||
from vllm.tracing import init_tracer
|
from vllm.tracing import init_tracer
|
||||||
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
|
|
||||||
from vllm.usage.usage_lib import UsageContext
|
from vllm.usage.usage_lib import UsageContext
|
||||||
from vllm.v1.engine import EngineCoreRequest
|
from vllm.v1.engine import EngineCoreRequest
|
||||||
from vllm.v1.engine.core_client import EngineCoreClient
|
from vllm.v1.engine.core_client import EngineCoreClient
|
||||||
@ -87,7 +86,7 @@ class LLMEngine:
|
|||||||
if self.model_config.skip_tokenizer_init:
|
if self.model_config.skip_tokenizer_init:
|
||||||
tokenizer = None
|
tokenizer = None
|
||||||
else:
|
else:
|
||||||
tokenizer = init_tokenizer_from_configs(self.model_config)
|
tokenizer = init_tokenizer_from_config(self.model_config)
|
||||||
|
|
||||||
self.input_processor = InputProcessor(self.vllm_config, tokenizer)
|
self.input_processor = InputProcessor(self.vllm_config, tokenizer)
|
||||||
self.io_processor = get_io_processor(
|
self.io_processor = get_io_processor(
|
||||||
|
|||||||
@ -7,7 +7,7 @@ from typing import TYPE_CHECKING
|
|||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.reasoning import ReasoningParserManager
|
from vllm.reasoning import ReasoningParserManager
|
||||||
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
|
from vllm.tokenizers import init_tokenizer_from_config
|
||||||
from vllm.utils.import_utils import LazyLoader
|
from vllm.utils.import_utils import LazyLoader
|
||||||
from vllm.v1.structured_output.backend_guidance import GuidanceBackend
|
from vllm.v1.structured_output.backend_guidance import GuidanceBackend
|
||||||
from vllm.v1.structured_output.backend_types import (
|
from vllm.v1.structured_output.backend_types import (
|
||||||
@ -61,7 +61,7 @@ class StructuredOutputManager:
|
|||||||
# of CPUs.
|
# of CPUs.
|
||||||
max_workers = max(1, (multiprocessing.cpu_count() + 1) // 2)
|
max_workers = max(1, (multiprocessing.cpu_count() + 1) // 2)
|
||||||
self.executor = ThreadPoolExecutor(max_workers=max_workers)
|
self.executor = ThreadPoolExecutor(max_workers=max_workers)
|
||||||
self.tokenizer = init_tokenizer_from_configs(
|
self.tokenizer = init_tokenizer_from_config(
|
||||||
model_config=self.vllm_config.model_config
|
model_config=self.vllm_config.model_config
|
||||||
)
|
)
|
||||||
reasoning_parser = (
|
reasoning_parser = (
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user