mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 03:35:17 +08:00
chore: remove unnecessary patch_padding_side for the chatglm model (#23090)
Signed-off-by: carlory <baofa.fan@daocloud.io> Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
parent
d3f71f1224
commit
569aefd134
@ -19,7 +19,6 @@ from transformers import (AutoConfig, AutoTokenizer, BatchFeature,
|
|||||||
from transformers.video_utils import VideoMetadata
|
from transformers.video_utils import VideoMetadata
|
||||||
|
|
||||||
from vllm.sequence import SampleLogprobs
|
from vllm.sequence import SampleLogprobs
|
||||||
from vllm.transformers_utils.tokenizer import patch_padding_side
|
|
||||||
from vllm.utils import is_list_of
|
from vllm.utils import is_list_of
|
||||||
|
|
||||||
from .....conftest import HfRunner, ImageAsset, ImageTestAssets
|
from .....conftest import HfRunner, ImageAsset, ImageTestAssets
|
||||||
@ -343,7 +342,6 @@ def gemma3_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
|||||||
def glm4v_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
def glm4v_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
||||||
"""Patches and returns an instance of the HfRunner to use for GLM4V."""
|
"""Patches and returns an instance of the HfRunner to use for GLM4V."""
|
||||||
hf_processor = hf_model.processor
|
hf_processor = hf_model.processor
|
||||||
patch_padding_side(hf_processor)
|
|
||||||
|
|
||||||
def processor(*args, text="", images=None, **kwargs):
|
def processor(*args, text="", images=None, **kwargs):
|
||||||
if images is None:
|
if images is None:
|
||||||
|
|||||||
@ -7,7 +7,6 @@ import os
|
|||||||
import warnings
|
import warnings
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from types import MethodType
|
|
||||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||||
|
|
||||||
import huggingface_hub
|
import huggingface_hub
|
||||||
@ -144,26 +143,6 @@ def get_cached_tokenizer(tokenizer: AnyTokenizer) -> AnyTokenizer:
|
|||||||
return cached_tokenizer
|
return cached_tokenizer
|
||||||
|
|
||||||
|
|
||||||
def patch_padding_side(tokenizer: PreTrainedTokenizer) -> None:
|
|
||||||
"""Patch _pad method to accept `padding_side` for older tokenizers."""
|
|
||||||
orig_pad = tokenizer._pad
|
|
||||||
|
|
||||||
def _pad(
|
|
||||||
self: PreTrainedTokenizer,
|
|
||||||
*args,
|
|
||||||
padding_side: Optional[str] = None,
|
|
||||||
**kwargs,
|
|
||||||
):
|
|
||||||
if padding_side is not None and padding_side != self.padding_side:
|
|
||||||
msg = ("`padding_side` argument is not supported by "
|
|
||||||
f"{type(tokenizer).__name__} and will be ignored.")
|
|
||||||
warnings.warn(msg, stacklevel=2)
|
|
||||||
|
|
||||||
return orig_pad(*args, **kwargs)
|
|
||||||
|
|
||||||
tokenizer._pad = MethodType(_pad, tokenizer)
|
|
||||||
|
|
||||||
|
|
||||||
def get_tokenizer(
|
def get_tokenizer(
|
||||||
tokenizer_name: Union[str, Path],
|
tokenizer_name: Union[str, Path],
|
||||||
*args,
|
*args,
|
||||||
@ -271,12 +250,6 @@ def get_tokenizer(
|
|||||||
}
|
}
|
||||||
tokenizer.add_special_tokens(special_tokens_map)
|
tokenizer.add_special_tokens(special_tokens_map)
|
||||||
|
|
||||||
# NOTE: We can remove this after https://github.com/zai-org/ChatGLM3/issues/1324
|
|
||||||
if type(tokenizer).__name__ in ("ChatGLMTokenizer",
|
|
||||||
"ChatGLM4Tokenizer"):
|
|
||||||
assert isinstance(tokenizer, PreTrainedTokenizer)
|
|
||||||
patch_padding_side(tokenizer)
|
|
||||||
|
|
||||||
if not isinstance(tokenizer, PreTrainedTokenizerFast):
|
if not isinstance(tokenizer, PreTrainedTokenizerFast):
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Using a slow tokenizer. This might cause a significant "
|
"Using a slow tokenizer. This might cause a significant "
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user