[Deprecation] Remove deprecated args and methods (#21907)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung 2025-07-31 14:46:38 +08:00 committed by GitHub
parent 3e36fcbee6
commit 9532a6d563
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 5 additions and 59 deletions

View File

@ -48,7 +48,7 @@ from vllm.transformers_utils.chat_templates import (
# yapf: enable
from vllm.transformers_utils.processor import cached_get_processor
from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
from vllm.utils import deprecate_kwargs, random_uuid
from vllm.utils import random_uuid
logger = init_logger(__name__)
@ -383,17 +383,12 @@ def resolve_mistral_chat_template(
return None
@deprecate_kwargs(
"trust_remote_code",
additional_message="Please use `model_config.trust_remote_code` instead.",
)
def resolve_hf_chat_template(
tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
chat_template: Optional[str],
tools: Optional[list[dict[str, Any]]],
*,
model_config: ModelConfig,
trust_remote_code: Optional[bool] = None,
) -> Optional[str]:
# 1st priority: The given chat template
if chat_template is not None:
@ -488,10 +483,6 @@ def _log_chat_template_content_format(
)
@deprecate_kwargs(
"trust_remote_code",
additional_message="Please use `model_config.trust_remote_code` instead.",
)
def resolve_chat_template_content_format(
chat_template: Optional[str],
tools: Optional[list[dict[str, Any]]],
@ -499,7 +490,6 @@ def resolve_chat_template_content_format(
tokenizer: AnyTokenizer,
*,
model_config: ModelConfig,
trust_remote_code: Optional[bool] = None,
) -> _ChatTemplateContentFormat:
if given_format != "auto":
return given_format
@ -568,17 +558,9 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
input_modality = modality.replace("_embeds", "")
if mm_registry.has_processor(model_config):
mm_processor = mm_registry.create_processor(model_config)
allowed_counts = mm_processor.info.get_allowed_mm_limits()
allowed_count = allowed_counts.get(input_modality, 0)
else:
mm_config = model_config.multimodal_config
if mm_config is None:
msg = "This model does not support multi-modal inputs"
raise ValueError(msg)
allowed_count = mm_config.get_limit_per_prompt(input_modality)
mm_processor = mm_registry.create_processor(model_config)
allowed_counts = mm_processor.info.get_allowed_mm_limits()
allowed_count = allowed_counts.get(input_modality, 0)
current_count = len(self._items_by_modality[modality]) + 1
if current_count > allowed_count:
@ -1285,10 +1267,6 @@ def parse_chat_messages_futures(
return conversation, mm_tracker.all_mm_data()
@deprecate_kwargs(
"trust_remote_code",
additional_message="Please use `model_config.trust_remote_code` instead.",
)
def apply_hf_chat_template(
tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
conversation: list[ConversationMessage],
@ -1297,8 +1275,6 @@ def apply_hf_chat_template(
*,
model_config: ModelConfig,
tokenize: bool = False, # Different from HF's default
# Deprecated, explicitly capture here so it doesn't slit into kwargs.
trust_remote_code: Optional[bool] = None,
**kwargs: Any,
) -> str:
hf_chat_template = resolve_hf_chat_template(

View File

@ -5,7 +5,6 @@ from dataclasses import dataclass
from typing import TYPE_CHECKING, Generic, Optional, Protocol, TypeVar
import torch.nn as nn
from typing_extensions import deprecated
from vllm.envs import VLLM_MM_INPUT_CACHE_GIB
from vllm.inputs import InputProcessingContext
@ -105,13 +104,6 @@ class MultiModalRegistry:
return True # Success
@deprecated("Legacy input processor/mapper pipeline has been removed. "
"Please update your model runner to use "
"`seq_group_metadata.multi_modal_data` directly without "
"further processing.")
def create_input_mapper(self, model_config: "ModelConfig"):
return lambda data, mm_processor_kwargs: data
def get_max_tokens_per_item_by_modality(
self,
model_config: "ModelConfig",
@ -182,16 +174,6 @@ class MultiModalRegistry:
"""
return sum(self.get_max_tokens_by_modality(model_config).values())
@deprecated("Legacy input processor/mapper pipeline has been removed. "
"Please update your model runner to use "
"`seq_group_metadata.multi_modal_data` directly without "
"further processing.")
def init_mm_limits_per_prompt(
self,
model_config: "ModelConfig",
) -> None:
pass
def get_mm_limits_per_prompt(
self,
model_config: "ModelConfig",
@ -246,13 +228,6 @@ class MultiModalRegistry:
model_cls, _ = get_model_architecture(model_config)
return model_cls
@deprecated("Legacy input processor/mapper pipeline has been removed. "
"Please update your model runner to use "
"`seq_group_metadata.multi_modal_data` directly without "
"further processing.")
def has_processor(self, model_config: "ModelConfig") -> bool:
return True
def create_processor(
self,
model_config: "ModelConfig",

View File

@ -15,8 +15,7 @@ from vllm.lora.request import LoRARequest
from vllm.model_executor import SamplingMetadata
from vllm.model_executor.layers.sampler import SamplerOutput
from vllm.model_executor.model_loader.neuron import get_neuron_model
from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
MultiModalKwargs)
from vllm.multimodal import BatchedTensorInputs, MultiModalKwargs
from vllm.platforms import current_platform
from vllm.sampling_params import SamplingParams
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
@ -88,10 +87,6 @@ class NeuronModelRunner(ModelRunnerBase[ModelInputForNeuron]):
self.device = self.device_config.device
self.pin_memory = is_pin_memory_available()
# Multi-modal data support
self.multi_modal_input_mapper = MULTIMODAL_REGISTRY \
.create_input_mapper(self.model_config)
# Lazy initialization.
self.model: nn.Module # initialize after load_model.