From 8c946cecca72ac9c05ab17dd4ffb51ecd2094074 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 13 May 2025 12:34:37 +0100 Subject: [PATCH] Update deprecated type hinting in `vllm/transformers_utils` (#18058) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/transformers_utils/config.py | 10 ++-- vllm/transformers_utils/configs/arctic.py | 6 +-- vllm/transformers_utils/configs/cohere2.py | 6 +-- .../configs/deepseek_vl2.py | 5 +- vllm/transformers_utils/configs/exaone.py | 4 +- vllm/transformers_utils/configs/jais.py | 4 +- .../configs/mlp_speculator.py | 6 +-- vllm/transformers_utils/configs/mpt.py | 18 ++++---- vllm/transformers_utils/configs/solar.py | 2 +- vllm/transformers_utils/configs/ultravox.py | 10 ++-- vllm/transformers_utils/detokenizer.py | 6 +-- vllm/transformers_utils/detokenizer_utils.py | 24 +++++----- .../processors/deepseek_vl2.py | 29 ++++++------ vllm/transformers_utils/processors/ovis.py | 12 ++--- vllm/transformers_utils/tokenizer_group.py | 8 ++-- vllm/transformers_utils/tokenizers/mistral.py | 46 +++++++++---------- vllm/transformers_utils/utils.py | 4 +- 17 files changed, 98 insertions(+), 102 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index f6c2b35535b6d..02034bf02ec9e 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -6,7 +6,7 @@ import os import time from functools import cache from pathlib import Path -from typing import Any, Callable, Dict, Literal, Optional, Type, Union +from typing import Any, Callable, Literal, Optional, Union import huggingface_hub from huggingface_hub import hf_hub_download @@ -55,11 +55,11 @@ HF_TOKEN = os.getenv('HF_TOKEN', None) logger = init_logger(__name__) -_CONFIG_REGISTRY_OVERRIDE_HF: Dict[str, Type[PretrainedConfig]] = { +_CONFIG_REGISTRY_OVERRIDE_HF: dict[str, type[PretrainedConfig]] = { "mllama": MllamaConfig } -_CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = { +_CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = { "chatglm": ChatGLMConfig, "cohere2": Cohere2Config, "dbrx": DbrxConfig, @@ -199,7 +199,7 @@ def patch_rope_scaling(config: PretrainedConfig) -> None: patch_rope_scaling_dict(rope_scaling) -def patch_rope_scaling_dict(rope_scaling: Dict[str, Any]) -> None: +def patch_rope_scaling_dict(rope_scaling: dict[str, Any]) -> None: if "rope_type" in rope_scaling and "type" in rope_scaling: rope_type = rope_scaling["rope_type"] rope_type_legacy = rope_scaling["type"] @@ -748,7 +748,7 @@ def get_hf_image_processor_config( hf_token: Optional[Union[bool, str]] = None, revision: Optional[str] = None, **kwargs, -) -> Dict[str, Any]: +) -> dict[str, Any]: # ModelScope does not provide an interface for image_processor if VLLM_USE_MODELSCOPE: return dict() diff --git a/vllm/transformers_utils/configs/arctic.py b/vllm/transformers_utils/configs/arctic.py index 5ab70c0e41362..2261f0a9e9aac 100644 --- a/vllm/transformers_utils/configs/arctic.py +++ b/vllm/transformers_utils/configs/arctic.py @@ -8,7 +8,7 @@ """ Arctic model configuration""" from dataclasses import asdict, dataclass -from typing import Any, Dict +from typing import Any from transformers.configuration_utils import PretrainedConfig from transformers.utils import logging @@ -192,14 +192,14 @@ class ArcticConfig(PretrainedConfig): ) @classmethod - def from_dict(cls, config_dict: Dict[str, Any], **kwargs) -> "ArcticConfig": + def from_dict(cls, config_dict: dict[str, Any], **kwargs) -> "ArcticConfig": result = super().from_dict(config_dict, **kwargs) config = result[0] if isinstance(result, tuple) else result if isinstance(config.quantization, dict): config.quantization = ArcticQuantizationConfig(**config.quantization) return result - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: ret = super().to_dict() if isinstance(ret["quantization"], ArcticQuantizationConfig): ret["quantization"] = asdict(ret["quantization"]) diff --git a/vllm/transformers_utils/configs/cohere2.py b/vllm/transformers_utils/configs/cohere2.py index e30409b3af5f0..21328d7675b82 100644 --- a/vllm/transformers_utils/configs/cohere2.py +++ b/vllm/transformers_utils/configs/cohere2.py @@ -61,7 +61,7 @@ class Cohere2Config(PretrainedConfig): Whether to tie weight embeddings rope_theta (`float`, *optional*, defaults to 10000.0): The base period of the RoPE embeddings. - rope_scaling (`Dict`, *optional*): + rope_scaling (`dict`, *optional*): Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type and you expect the model to work on longer `max_position_embeddings`, we recommend you to update this value accordingly. @@ -86,11 +86,11 @@ class Cohere2Config(PretrainedConfig): `beta_slow` (`float`, *optional*): Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear ramp function. If unspecified, it defaults to 1. - `short_factor` (`List[float]`, *optional*): + `short_factor` (`list[float]`, *optional*): Only used with 'longrope'. The scaling factor to be applied to short contexts (< `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden size divided by the number of attention heads divided by 2 - `long_factor` (`List[float]`, *optional*): + `long_factor` (`list[float]`, *optional*): Only used with 'longrope'. The scaling factor to be applied to long contexts (< `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden size divided by the number of attention heads divided by 2 diff --git a/vllm/transformers_utils/configs/deepseek_vl2.py b/vllm/transformers_utils/configs/deepseek_vl2.py index 24d4052d87211..a54486fa41cd1 100644 --- a/vllm/transformers_utils/configs/deepseek_vl2.py +++ b/vllm/transformers_utils/configs/deepseek_vl2.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # adapted from https://github.com/deepseek-ai/DeepSeek-VL2/blob/faf18023f24b962b32d9f0a2d89e402a8d383a78/deepseek_vl2/models/modeling_deepseek_vl_v2.py#L115-L268 -from typing import Tuple from transformers.configuration_utils import PretrainedConfig @@ -191,12 +190,12 @@ class DeepseekVLV2Config(PretrainedConfig): tile_tag: str = "2D" global_view_pos: str = "head" - candidate_resolutions: Tuple[Tuple[int, int]] = ((384, 384), ) + candidate_resolutions: tuple[tuple[int, int]] = ((384, 384), ) def __init__(self, tile_tag: str = "tile_tag", global_view_pos: str = "head", - candidate_resolutions: Tuple[Tuple[int, + candidate_resolutions: tuple[tuple[int, int]] = ((384, 384), ), **kwargs): super().__init__(**kwargs) diff --git a/vllm/transformers_utils/configs/exaone.py b/vllm/transformers_utils/configs/exaone.py index 8181604191a19..25bafbb85d306 100644 --- a/vllm/transformers_utils/configs/exaone.py +++ b/vllm/transformers_utils/configs/exaone.py @@ -17,14 +17,12 @@ # limitations under the License. """Exaone model configuration""" -from typing import Dict - from transformers.configuration_utils import PretrainedConfig from transformers.utils import logging logger = logging.get_logger(__name__) -EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP: Dict[str, str] = {} +EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP: dict[str, str] = {} class ExaoneConfig(PretrainedConfig): diff --git a/vllm/transformers_utils/configs/jais.py b/vllm/transformers_utils/configs/jais.py index be0f3b7e5e529..b947c6a9e2b4b 100644 --- a/vllm/transformers_utils/configs/jais.py +++ b/vllm/transformers_utils/configs/jais.py @@ -98,7 +98,7 @@ class JAISConfig(PretrainedConfig): Scale attention weights by dividing by hidden_size instead of sqrt(hidden_size). Need to set scale_attn_weights to `True` as well. - alibi_scaling (`Dict`, *optional*): + alibi_scaling (`dict`, *optional*): Dictionary containing the scaling configuration for ALiBi embeddings. Currently only supports linear scaling strategy. Can specify either the scaling `factor` (must be @@ -108,7 +108,7 @@ class JAISConfig(PretrainedConfig): formats are `{"type": strategy name, "factor": scaling factor}` or `{"type": strategy name, "train_seq_len": training sequence length}`. - architectures (`List`, *optional*, defaults to ['JAISLMHeadModel']): + architectures (`list`, *optional*, defaults to ['JAISLMHeadModel']): architecture names for Jais. Example: diff --git a/vllm/transformers_utils/configs/mlp_speculator.py b/vllm/transformers_utils/configs/mlp_speculator.py index c761f659e5b2c..70f60752905cb 100644 --- a/vllm/transformers_utils/configs/mlp_speculator.py +++ b/vllm/transformers_utils/configs/mlp_speculator.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 -from typing import List, Optional +from typing import Optional from transformers import PretrainedConfig @@ -17,7 +17,7 @@ class MLPSpeculatorConfig(PretrainedConfig): emb_dim: int = 4096, inner_dim: int = 0, n_predict: int = 3, - top_k_tokens_per_head: Optional[List[int]] = None, + top_k_tokens_per_head: Optional[list[int]] = None, n_candidates: int = 5, tie_weights: bool = False, scale_input: bool = False, @@ -34,7 +34,7 @@ class MLPSpeculatorConfig(PretrainedConfig): the inner dimension of the model. If 0, will be the emb_dim. n_predict: int the number of lookaheads for the speculator - top_k_tokens_per_head: List[int] + top_k_tokens_per_head: list[int] Number of tokens to consider from each head when forming the candidate tree. For each candidate branch in the tree, head n produces topk[n] diff --git a/vllm/transformers_utils/configs/mpt.py b/vllm/transformers_utils/configs/mpt.py index 96356135f6b28..2d52658d3973c 100644 --- a/vllm/transformers_utils/configs/mpt.py +++ b/vllm/transformers_utils/configs/mpt.py @@ -4,11 +4,11 @@ # https://huggingface.co/mosaicml/mpt-7b/blob/main/configuration_mpt.py """A HuggingFace-style model configuration.""" import warnings -from typing import Any, Dict, Optional, Union +from typing import Any, Optional, Union from transformers import PretrainedConfig -attn_config_defaults: Dict = { +attn_config_defaults: dict = { 'attn_type': 'multihead_attention', 'attn_pdrop': 0.0, 'attn_impl': 'triton', @@ -20,8 +20,8 @@ attn_config_defaults: Dict = { 'alibi': False, 'alibi_bias_max': 8 } -ffn_config_defaults: Dict = {'ffn_type': 'mptmlp'} -init_config_defaults: Dict = { +ffn_config_defaults: dict = {'ffn_type': 'mptmlp'} +init_config_defaults: dict = { 'name': 'kaiming_normal_', 'fan_mode': 'fan_in', 'init_nonlinearity': 'relu', @@ -52,15 +52,15 @@ class MPTConfig(PretrainedConfig): resid_pdrop: float = 0.0, emb_pdrop: float = 0.0, learned_pos_emb: bool = True, - attn_config: Dict = attn_config_defaults, - ffn_config: Dict = ffn_config_defaults, + attn_config: dict = attn_config_defaults, + ffn_config: dict = ffn_config_defaults, init_device: str = 'cpu', logit_scale: Optional[Union[float, str]] = None, no_bias: bool = False, embedding_fraction: float = 1.0, norm_type: str = 'low_precision_layernorm', use_cache: bool = False, - init_config: Dict = init_config_defaults, + init_config: dict = init_config_defaults, fc_type: str = 'torch', verbose: Optional[int] = None, **kwargs: Any): @@ -102,8 +102,8 @@ class MPTConfig(PretrainedConfig): self._validate_config() def _set_config_defaults( - self, config: Dict[str, Any], - config_defaults: Dict[str, Any]) -> Dict[str, Any]: + self, config: dict[str, Any], + config_defaults: dict[str, Any]) -> dict[str, Any]: for (k, v) in config_defaults.items(): if k not in config: config[k] = v diff --git a/vllm/transformers_utils/configs/solar.py b/vllm/transformers_utils/configs/solar.py index 0d5db896b93d3..6eaf699d17bee 100644 --- a/vllm/transformers_utils/configs/solar.py +++ b/vllm/transformers_utils/configs/solar.py @@ -108,7 +108,7 @@ class SolarConfig(PretrainedConfig): Whether to tie weight embeddings rope_theta (`float`, *optional*, defaults to 10000.0): The base period of the RoPE embeddings. - rope_scaling (`Dict`, *optional*): + rope_scaling (`dict`, *optional*): Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling diff --git a/vllm/transformers_utils/configs/ultravox.py b/vllm/transformers_utils/configs/ultravox.py index 6b2765db94e78..4c50724272634 100644 --- a/vllm/transformers_utils/configs/ultravox.py +++ b/vllm/transformers_utils/configs/ultravox.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # Adapted from https://github.com/fixie-ai/ultravox/blob/ecd58c4041030bae2ad15aa6bcf04ab43199ea02/ultravox/model/ultravox_config.py -from typing import Any, Dict, Optional +from typing import Any, Optional import transformers @@ -48,8 +48,8 @@ class UltravoxConfig(transformers.PretrainedConfig): def __init__( self, - audio_config: Optional[Dict[str, Any]] = None, - text_config: Optional[Dict[str, Any]] = None, + audio_config: Optional[dict[str, Any]] = None, + text_config: Optional[dict[str, Any]] = None, audio_model_id: Optional[str] = None, text_model_id: Optional[str] = None, ignore_index: int = -100, @@ -58,8 +58,8 @@ class UltravoxConfig(transformers.PretrainedConfig): stack_factor: int = 8, norm_init: float = 0.4, projector_act: str = "swiglu", - text_model_lora_config: Optional[Dict[str, Any]] = None, - audio_model_lora_config: Optional[Dict[str, Any]] = None, + text_model_lora_config: Optional[dict[str, Any]] = None, + audio_model_lora_config: Optional[dict[str, Any]] = None, projector_ln_mid: bool = False, **kwargs, ): diff --git a/vllm/transformers_utils/detokenizer.py b/vllm/transformers_utils/detokenizer.py index 991d5631e64e3..3adf2e32cca7c 100644 --- a/vllm/transformers_utils/detokenizer.py +++ b/vllm/transformers_utils/detokenizer.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 -from typing import Dict, List, Optional +from typing import Optional from vllm.sequence import (VLLM_INVALID_TOKEN_ID, Logprob, SamplingParams, Sequence, SequenceGroup) @@ -22,7 +22,7 @@ class Detokenizer: return self.tokenizer_group.get_lora_tokenizer(sequence.lora_request) def decode_prompt_logprobs_inplace(self, seq_group: SequenceGroup, - prompt_logprobs: List[Optional[Dict[ + prompt_logprobs: list[Optional[dict[ int, Logprob]]], position_offset: int) -> None: """Decodes the logprobs for the prompt of a sequence group. @@ -49,7 +49,7 @@ class Detokenizer: read_offset = 0 next_iter_prefix_offset = 0 next_iter_read_offset = 0 - next_iter_tokens: List[str] = [] + next_iter_tokens: list[str] = [] prev_tokens = None for token_position_in_logprob, prompt_logprobs_for_token in enumerate( diff --git a/vllm/transformers_utils/detokenizer_utils.py b/vllm/transformers_utils/detokenizer_utils.py index a1fa27773fe5c..7373fa0ede237 100644 --- a/vllm/transformers_utils/detokenizer_utils.py +++ b/vllm/transformers_utils/detokenizer_utils.py @@ -1,11 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 -from typing import List, Optional, Tuple +from typing import Optional from .tokenizer import AnyTokenizer -def _replace_none_with_empty(tokens: List[Optional[str]]): +def _replace_none_with_empty(tokens: list[Optional[str]]): for i, token in enumerate(tokens): if token is None: tokens[i] = "" @@ -13,7 +13,7 @@ def _replace_none_with_empty(tokens: List[Optional[str]]): def _convert_tokens_to_string_with_added_encoders( tokenizer: AnyTokenizer, - output_tokens: List[str], + output_tokens: list[str], skip_special_tokens: bool, spaces_between_special_tokens: bool, ) -> str: @@ -22,8 +22,8 @@ def _convert_tokens_to_string_with_added_encoders( # NOTE(woosuk): The following code is slow because it runs a for loop over # the output_tokens. In Python, running a for loop over a list can be slow # even when the loop body is very simple. - sub_texts: List[str] = [] - current_sub_text: List[str] = [] + sub_texts: list[str] = [] + current_sub_text: list[str] = [] all_special_tokens = set(tokenizer.all_special_tokens) for token in output_tokens: if skip_special_tokens and token in all_special_tokens: @@ -52,9 +52,9 @@ INITIAL_INCREMENTAL_DETOKENIZATION_OFFSET = 5 def convert_prompt_ids_to_tokens( tokenizer: AnyTokenizer, - prompt_ids: List[int], + prompt_ids: list[int], skip_special_tokens: bool = False, -) -> Tuple[List[str], int, int]: +) -> tuple[list[str], int, int]: """Converts the prompt ids to tokens and returns the tokens and offsets for incremental detokenization. @@ -76,8 +76,8 @@ def convert_prompt_ids_to_tokens( def convert_ids_list_to_tokens( tokenizer: AnyTokenizer, - token_ids: List[int], -) -> List[str]: + token_ids: list[int], +) -> list[str]: """Detokenize the input ids individually. Args: @@ -98,13 +98,13 @@ def convert_ids_list_to_tokens( # under Apache 2.0 license def detokenize_incrementally( tokenizer: AnyTokenizer, - all_input_ids: List[int], - prev_tokens: Optional[List[str]], + all_input_ids: list[int], + prev_tokens: Optional[list[str]], prefix_offset: int, read_offset: int, skip_special_tokens: bool = False, spaces_between_special_tokens: bool = True, -) -> Tuple[List[str], str, int, int]: +) -> tuple[list[str], str, int, int]: """Detokenizes the input ids incrementally and returns the new tokens and the new text. diff --git a/vllm/transformers_utils/processors/deepseek_vl2.py b/vllm/transformers_utils/processors/deepseek_vl2.py index 316281f2af4e5..df960e9c7aa8f 100644 --- a/vllm/transformers_utils/processors/deepseek_vl2.py +++ b/vllm/transformers_utils/processors/deepseek_vl2.py @@ -24,7 +24,6 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import math -from typing import List, Tuple import torch import torchvision.transforms as T @@ -36,8 +35,8 @@ from transformers.processing_utils import ProcessorMixin class ImageTransform: def __init__(self, - mean: Tuple[float, float, float] = (0.5, 0.5, 0.5), - std: Tuple[float, float, float] = (0.5, 0.5, 0.5), + mean: tuple[float, float, float] = (0.5, 0.5, 0.5), + std: tuple[float, float, float] = (0.5, 0.5, 0.5), normalize: bool = True): self.mean = mean self.std = std @@ -62,11 +61,11 @@ class DeepseekVLV2Processor(ProcessorMixin): def __init__( self, tokenizer: LlamaTokenizerFast, - candidate_resolutions: Tuple[Tuple[int, int]], + candidate_resolutions: tuple[tuple[int, int]], patch_size: int, downsample_ratio: int, - image_mean: Tuple[float, float, float] = (0.5, 0.5, 0.5), - image_std: Tuple[float, float, float] = (0.5, 0.5, 0.5), + image_mean: tuple[float, float, float] = (0.5, 0.5, 0.5), + image_std: tuple[float, float, float] = (0.5, 0.5, 0.5), normalize: bool = True, image_token: str = "", pad_token: str = "<|▁pad▁|>", @@ -170,13 +169,13 @@ class DeepseekVLV2Processor(ProcessorMixin): return t - def decode(self, t: List[int], **kwargs) -> str: + def decode(self, t: list[int], **kwargs) -> str: return self.tokenizer.decode(t, **kwargs) def process_one( self, prompt: str, - images: List[Image.Image], + images: list[Image.Image], inference_mode: bool = True, **kwargs, ): @@ -184,8 +183,8 @@ class DeepseekVLV2Processor(ProcessorMixin): Args: prompt (str): the formatted prompt; - conversations (List[Dict]): conversations with a list of messages; - images (List[ImageType]): the list of images; + conversations (list[dict]): conversations with a list of messages; + images (list[ImageType]): the list of images; inference_mode (bool): if True, then remove the last eos token; system_prompt (str): the system prompt; **kwargs: @@ -196,7 +195,7 @@ class DeepseekVLV2Processor(ProcessorMixin): - target_ids (torch.LongTensor): [N + image tokens] - pixel_values (torch.FloatTensor): [n_patches, 3, H, W] - image_id (int): the id of the image token - - num_image_tokens (List[int]): the number of image tokens + - num_image_tokens (list[int]): the number of image tokens """ assert (prompt is not None and images is not None @@ -257,7 +256,7 @@ class DeepseekVLV2Processor(ProcessorMixin): self, *, prompt: str, - images: List[Image.Image], + images: list[Image.Image], inference_mode: bool = True, **kwargs, ): @@ -265,7 +264,7 @@ class DeepseekVLV2Processor(ProcessorMixin): Args: prompt (str): the formatted prompt; - images (List[ImageType]): the list of images; + images (list[ImageType]): the list of images; inference_mode (bool): if True, then remove the last eos token; **kwargs: @@ -274,7 +273,7 @@ class DeepseekVLV2Processor(ProcessorMixin): - input_ids (torch.LongTensor): [N + image tokens] - images (torch.FloatTensor): [n_images, 3, H, W] - image_id (int): the id of the image token - - num_image_tokens (List[int]): the number of image tokens + - num_image_tokens (list[int]): the number of image tokens """ prepare = self.process_one( @@ -288,7 +287,7 @@ class DeepseekVLV2Processor(ProcessorMixin): def tokenize_with_images( self, conversation: str, - images: List[Image.Image], + images: list[Image.Image], bos: bool = True, eos: bool = True, cropping: bool = True, diff --git a/vllm/transformers_utils/processors/ovis.py b/vllm/transformers_utils/processors/ovis.py index 48e786792cf51..a35d32999991d 100644 --- a/vllm/transformers_utils/processors/ovis.py +++ b/vllm/transformers_utils/processors/ovis.py @@ -23,7 +23,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from functools import cached_property -from typing import List, Union +from typing import Union import PIL import torch @@ -102,7 +102,7 @@ class OvisProcessor(ProcessorMixin): def __call__( self, images: ImageInput = None, - text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, + text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, **kwargs: Unpack[OvisProcessorKwargs], ) -> BatchFeature: """ @@ -111,14 +111,14 @@ class OvisProcessor(ProcessorMixin): the text. To prepare the vision inputs, this method forwards the `vision_infos` and `kwrags` arguments to Qwen2VLImageProcessor's [`~Qwen2VLImageProcessor.__call__`] if `vision_infos` is not `None`. Args: - images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`): + images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `list[PIL.Image.Image]`, `list[np.ndarray]`, `list[torch.Tensor]`): The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch tensor. Both channels-first and channels-last formats are supported. - text (`str`, `List[str]`, `List[List[str]]`): + text (`str`, `list[str]`, `list[list[str]]`): The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set `is_split_into_words=True` (to lift the ambiguity with a batch of sequences). - videos (`np.ndarray`, `torch.Tensor`, `List[np.ndarray]`, `List[torch.Tensor]`): + videos (`np.ndarray`, `torch.Tensor`, `list[np.ndarray]`, `list[torch.Tensor]`): The image or batch of videos to be prepared. Each video can be a 4D NumPy array or PyTorch tensor, or a nested list of 3D frames. Both channels-first and channels-last formats are supported. return_tensors (`str` or [`~utils.TensorType`], *optional*): @@ -400,7 +400,7 @@ class OvisProcessor(ProcessorMixin): The output of the model `generate` function. The output is expected to be a tensor of shape `(batch_size, sequence_length)` or `(sequence_length,)`. Returns: - `List[str]`: The decoded text. + `list[str]`: The decoded text. """ return self.tokenizer.batch_decode( generated_outputs, skip_special_tokens=True, clean_up_tokenization_spaces=False diff --git a/vllm/transformers_utils/tokenizer_group.py b/vllm/transformers_utils/tokenizer_group.py index aff2d2eb1c357..8b9e4881ef88f 100644 --- a/vllm/transformers_utils/tokenizer_group.py +++ b/vllm/transformers_utils/tokenizer_group.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 -from typing import List, Optional +from typing import Optional from vllm.config import LoRAConfig, ModelConfig, SchedulerConfig from vllm.lora.request import LoRARequest @@ -32,7 +32,7 @@ class TokenizerGroup: return self.max_input_length def _raise_if_input_too_long(self, - encoded_tokens: List[int], + encoded_tokens: list[int], lora_request: Optional[LoRARequest] = None): input_length = len(encoded_tokens) if lora_request: @@ -48,7 +48,7 @@ class TokenizerGroup: max_length: Optional[int] = None, truncation: Optional[bool] = None, lora_request: Optional[LoRARequest] = None, - add_special_tokens: Optional[bool] = None) -> List[int]: + add_special_tokens: Optional[bool] = None) -> list[int]: tokenizer = self.get_lora_tokenizer(lora_request) ret = encode_tokens(tokenizer, @@ -65,7 +65,7 @@ class TokenizerGroup: max_length: Optional[int] = None, truncation: Optional[bool] = None, lora_request: Optional[LoRARequest] = None, - add_special_tokens: Optional[bool] = None) -> List[int]: + add_special_tokens: Optional[bool] = None) -> list[int]: tokenizer = await self.get_lora_tokenizer_async(lora_request) ret = encode_tokens(tokenizer, prompt, diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py index 3db7a0a5c5c15..551c2d55b4fc6 100644 --- a/vllm/transformers_utils/tokenizers/mistral.py +++ b/vllm/transformers_utils/tokenizers/mistral.py @@ -4,7 +4,7 @@ import os import re from dataclasses import dataclass from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast +from typing import TYPE_CHECKING, Any, Optional, Union, cast import huggingface_hub from huggingface_hub import HfApi, hf_hub_download @@ -28,7 +28,7 @@ logger = init_logger(__name__) @dataclass class Encoding: - input_ids: Union[List[int], List[List[int]]] + input_ids: Union[list[int], list[list[int]]] def maybe_serialize_tool_calls(request: "ChatCompletionRequest"): @@ -105,7 +105,7 @@ def validate_request_params(request: "ChatCompletionRequest"): "for Mistral tokenizers.") -def list_local_repo_files(repo_id: str, revision: Optional[str]) -> List[str]: +def list_local_repo_files(repo_id: str, revision: Optional[str]) -> list[str]: repo_cache = os.path.join( huggingface_hub.constants.HF_HUB_CACHE, huggingface_hub.constants.REPO_ID_SEPARATOR.join( @@ -125,7 +125,7 @@ def list_local_repo_files(repo_id: str, revision: Optional[str]) -> List[str]: return [] -def find_tokenizer_file(files: List[str]): +def find_tokenizer_file(files: list[str]): file_pattern = re.compile( r"^tokenizer\.model\.v.*$|^tekken\.json$|^tokenizer\.mm\.model\.v.*$") @@ -145,10 +145,10 @@ def find_tokenizer_file(files: List[str]): def make_mistral_chat_completion_request( - messages: List["ChatCompletionMessageParam"], - tools: Optional[List[Dict[str, + messages: list["ChatCompletionMessageParam"], + tools: Optional[list[dict[str, Any]]] = None) -> "ChatCompletionRequest": - last_message = cast(Dict[str, Any], messages[-1]) + last_message = cast(dict[str, Any], messages[-1]) if last_message["role"] == "assistant": last_message["prefix"] = True @@ -199,7 +199,7 @@ class MistralTokenizer(TokenizerBase): raise TypeError(f"Unsupported tokenizer: {type(tokenizer_)}") self._vocab = tokenizer_.vocab() - # Convert to a Dict[str, int] to match protocol, but this is a lossy + # Convert to a dict[str, int] to match protocol, but this is a lossy # conversion. There may be multiple token ids that decode to the same # string due to partial UTF-8 byte sequences being converted to � self._vocab_dict = { @@ -314,21 +314,21 @@ class MistralTokenizer(TokenizerBase): def __call__( self, - text: Union[str, List[str], List[int]], + text: Union[str, list[str], list[int]], text_pair: Optional[str] = None, add_special_tokens: bool = False, truncation: bool = False, max_length: Optional[int] = None, ): - input_ids: Union[List[int], List[List[int]]] - # For List[str], original prompt text + input_ids: Union[list[int], list[list[int]]] + # For list[str], original prompt text if is_list_of(text, str): - input_ids_: List[List[int]] = [] + input_ids_: list[list[int]] = [] for p in text: each_input_ids = self.encode_one(p, truncation, max_length) input_ids_.append(each_input_ids) input_ids = input_ids_ - # For List[int], apply chat template output, already tokens. + # For list[int], apply chat template output, already tokens. elif is_list_of(text, int): input_ids = text # For str, single prompt text @@ -350,7 +350,7 @@ class MistralTokenizer(TokenizerBase): text: str, truncation: bool = False, max_length: Optional[int] = None, - ) -> List[int]: + ) -> list[int]: # Mistral Tokenizers should not add special tokens input_ids = self.encode(text) @@ -362,7 +362,7 @@ class MistralTokenizer(TokenizerBase): text: str, truncation: Optional[bool] = None, max_length: Optional[int] = None, - add_special_tokens: Optional[bool] = None) -> List[int]: + add_special_tokens: Optional[bool] = None) -> list[int]: # `encode` should only be used for prompt completion # it should never be used for chat_completion. # For chat completion use `apply_chat_template` @@ -374,9 +374,9 @@ class MistralTokenizer(TokenizerBase): return self.tokenizer.encode(text, bos=True, eos=False) def apply_chat_template(self, - messages: List["ChatCompletionMessageParam"], - tools: Optional[List[Dict[str, Any]]] = None, - **kwargs) -> List[int]: + messages: list["ChatCompletionMessageParam"], + tools: Optional[list[dict[str, Any]]] = None, + **kwargs) -> list[int]: request = make_mistral_chat_completion_request(messages, tools) encoded = self.mistral.encode_chat_completion(request) @@ -384,7 +384,7 @@ class MistralTokenizer(TokenizerBase): # encode-decode to get clean prompt return encoded.tokens - def convert_tokens_to_string(self, tokens: List[str]) -> str: + def convert_tokens_to_string(self, tokens: list[str]) -> str: from mistral_common.tokens.tokenizers.base import SpecialTokens if self.is_tekken: tokens = [ @@ -417,7 +417,7 @@ class MistralTokenizer(TokenizerBase): # make sure certain special tokens like Tool calls are # not decoded special_tokens = {SpecialTokens.tool_calls} - regular_tokens: List[str] = [] + regular_tokens: list[str] = [] decoded_list = [] for token in tokens: @@ -442,7 +442,7 @@ class MistralTokenizer(TokenizerBase): # See: guided_decoding/outlines_logits_processors.py::_adapt_tokenizer # for more. def decode(self, - ids: Union[List[int], int], + ids: Union[list[int], int], skip_special_tokens: bool = True) -> str: assert ( skip_special_tokens @@ -454,9 +454,9 @@ class MistralTokenizer(TokenizerBase): def convert_ids_to_tokens( self, - ids: List[int], + ids: list[int], skip_special_tokens: bool = True, - ) -> List[str]: + ) -> list[str]: from mistral_common.tokens.tokenizers.base import SpecialTokens # TODO(Patrick) - potentially allow special tokens to not be skipped diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py index 81eb4d9b6abc3..8dff1b612fdbb 100644 --- a/vllm/transformers_utils/utils.py +++ b/vllm/transformers_utils/utils.py @@ -4,7 +4,7 @@ import json from functools import cache from os import PathLike from pathlib import Path -from typing import List, Optional, Union +from typing import Optional, Union from vllm.envs import VLLM_MODEL_REDIRECT_PATH from vllm.logger import init_logger @@ -38,7 +38,7 @@ def modelscope_list_repo_files( repo_id: str, revision: Optional[str] = None, token: Union[str, bool, None] = None, -) -> List[str]: +) -> list[str]: """List files in a modelscope repo.""" from modelscope.hub.api import HubApi api = HubApi()