mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 05:34:57 +08:00
[Deprecation] Remove deprecated args and methods (#21907)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
3e36fcbee6
commit
9532a6d563
@ -48,7 +48,7 @@ from vllm.transformers_utils.chat_templates import (
|
||||
# yapf: enable
|
||||
from vllm.transformers_utils.processor import cached_get_processor
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
|
||||
from vllm.utils import deprecate_kwargs, random_uuid
|
||||
from vllm.utils import random_uuid
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@ -383,17 +383,12 @@ def resolve_mistral_chat_template(
|
||||
return None
|
||||
|
||||
|
||||
@deprecate_kwargs(
|
||||
"trust_remote_code",
|
||||
additional_message="Please use `model_config.trust_remote_code` instead.",
|
||||
)
|
||||
def resolve_hf_chat_template(
|
||||
tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
|
||||
chat_template: Optional[str],
|
||||
tools: Optional[list[dict[str, Any]]],
|
||||
*,
|
||||
model_config: ModelConfig,
|
||||
trust_remote_code: Optional[bool] = None,
|
||||
) -> Optional[str]:
|
||||
# 1st priority: The given chat template
|
||||
if chat_template is not None:
|
||||
@ -488,10 +483,6 @@ def _log_chat_template_content_format(
|
||||
)
|
||||
|
||||
|
||||
@deprecate_kwargs(
|
||||
"trust_remote_code",
|
||||
additional_message="Please use `model_config.trust_remote_code` instead.",
|
||||
)
|
||||
def resolve_chat_template_content_format(
|
||||
chat_template: Optional[str],
|
||||
tools: Optional[list[dict[str, Any]]],
|
||||
@ -499,7 +490,6 @@ def resolve_chat_template_content_format(
|
||||
tokenizer: AnyTokenizer,
|
||||
*,
|
||||
model_config: ModelConfig,
|
||||
trust_remote_code: Optional[bool] = None,
|
||||
) -> _ChatTemplateContentFormat:
|
||||
if given_format != "auto":
|
||||
return given_format
|
||||
@ -568,17 +558,9 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
|
||||
|
||||
input_modality = modality.replace("_embeds", "")
|
||||
|
||||
if mm_registry.has_processor(model_config):
|
||||
mm_processor = mm_registry.create_processor(model_config)
|
||||
allowed_counts = mm_processor.info.get_allowed_mm_limits()
|
||||
allowed_count = allowed_counts.get(input_modality, 0)
|
||||
else:
|
||||
mm_config = model_config.multimodal_config
|
||||
if mm_config is None:
|
||||
msg = "This model does not support multi-modal inputs"
|
||||
raise ValueError(msg)
|
||||
|
||||
allowed_count = mm_config.get_limit_per_prompt(input_modality)
|
||||
mm_processor = mm_registry.create_processor(model_config)
|
||||
allowed_counts = mm_processor.info.get_allowed_mm_limits()
|
||||
allowed_count = allowed_counts.get(input_modality, 0)
|
||||
|
||||
current_count = len(self._items_by_modality[modality]) + 1
|
||||
if current_count > allowed_count:
|
||||
@ -1285,10 +1267,6 @@ def parse_chat_messages_futures(
|
||||
return conversation, mm_tracker.all_mm_data()
|
||||
|
||||
|
||||
@deprecate_kwargs(
|
||||
"trust_remote_code",
|
||||
additional_message="Please use `model_config.trust_remote_code` instead.",
|
||||
)
|
||||
def apply_hf_chat_template(
|
||||
tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
|
||||
conversation: list[ConversationMessage],
|
||||
@ -1297,8 +1275,6 @@ def apply_hf_chat_template(
|
||||
*,
|
||||
model_config: ModelConfig,
|
||||
tokenize: bool = False, # Different from HF's default
|
||||
# Deprecated, explicitly capture here so it doesn't slit into kwargs.
|
||||
trust_remote_code: Optional[bool] = None,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
hf_chat_template = resolve_hf_chat_template(
|
||||
|
||||
@ -5,7 +5,6 @@ from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Generic, Optional, Protocol, TypeVar
|
||||
|
||||
import torch.nn as nn
|
||||
from typing_extensions import deprecated
|
||||
|
||||
from vllm.envs import VLLM_MM_INPUT_CACHE_GIB
|
||||
from vllm.inputs import InputProcessingContext
|
||||
@ -105,13 +104,6 @@ class MultiModalRegistry:
|
||||
|
||||
return True # Success
|
||||
|
||||
@deprecated("Legacy input processor/mapper pipeline has been removed. "
|
||||
"Please update your model runner to use "
|
||||
"`seq_group_metadata.multi_modal_data` directly without "
|
||||
"further processing.")
|
||||
def create_input_mapper(self, model_config: "ModelConfig"):
|
||||
return lambda data, mm_processor_kwargs: data
|
||||
|
||||
def get_max_tokens_per_item_by_modality(
|
||||
self,
|
||||
model_config: "ModelConfig",
|
||||
@ -182,16 +174,6 @@ class MultiModalRegistry:
|
||||
"""
|
||||
return sum(self.get_max_tokens_by_modality(model_config).values())
|
||||
|
||||
@deprecated("Legacy input processor/mapper pipeline has been removed. "
|
||||
"Please update your model runner to use "
|
||||
"`seq_group_metadata.multi_modal_data` directly without "
|
||||
"further processing.")
|
||||
def init_mm_limits_per_prompt(
|
||||
self,
|
||||
model_config: "ModelConfig",
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def get_mm_limits_per_prompt(
|
||||
self,
|
||||
model_config: "ModelConfig",
|
||||
@ -246,13 +228,6 @@ class MultiModalRegistry:
|
||||
model_cls, _ = get_model_architecture(model_config)
|
||||
return model_cls
|
||||
|
||||
@deprecated("Legacy input processor/mapper pipeline has been removed. "
|
||||
"Please update your model runner to use "
|
||||
"`seq_group_metadata.multi_modal_data` directly without "
|
||||
"further processing.")
|
||||
def has_processor(self, model_config: "ModelConfig") -> bool:
|
||||
return True
|
||||
|
||||
def create_processor(
|
||||
self,
|
||||
model_config: "ModelConfig",
|
||||
|
||||
@ -15,8 +15,7 @@ from vllm.lora.request import LoRARequest
|
||||
from vllm.model_executor import SamplingMetadata
|
||||
from vllm.model_executor.layers.sampler import SamplerOutput
|
||||
from vllm.model_executor.model_loader.neuron import get_neuron_model
|
||||
from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
|
||||
MultiModalKwargs)
|
||||
from vllm.multimodal import BatchedTensorInputs, MultiModalKwargs
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
|
||||
@ -88,10 +87,6 @@ class NeuronModelRunner(ModelRunnerBase[ModelInputForNeuron]):
|
||||
self.device = self.device_config.device
|
||||
self.pin_memory = is_pin_memory_available()
|
||||
|
||||
# Multi-modal data support
|
||||
self.multi_modal_input_mapper = MULTIMODAL_REGISTRY \
|
||||
.create_input_mapper(self.model_config)
|
||||
|
||||
# Lazy initialization.
|
||||
self.model: nn.Module # initialize after load_model.
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user