[Deprecation] Remove deprecated args and methods (#21907)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-12-10 05:34:57 +08:00 · 2025-07-31 14:46:38 +08:00 · 2025-07-31 14:46:38 +08:00 · 9532a6d563
commit 9532a6d563
parent 3e36fcbee6
3 changed files with 5 additions and 59 deletions
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@ -48,7 +48,7 @@ from vllm.transformers_utils.chat_templates import (
 # yapf: enable
 from vllm.transformers_utils.processor import cached_get_processor
 from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
-from vllm.utils import deprecate_kwargs, random_uuid
+from vllm.utils import random_uuid

 logger = init_logger(__name__)

@ -383,17 +383,12 @@ def resolve_mistral_chat_template(
    return None


-@deprecate_kwargs(
-    "trust_remote_code",
-    additional_message="Please use `model_config.trust_remote_code` instead.",
-)
 def resolve_hf_chat_template(
    tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
    chat_template: Optional[str],
    tools: Optional[list[dict[str, Any]]],
    *,
    model_config: ModelConfig,
-    trust_remote_code: Optional[bool] = None,
 ) -> Optional[str]:
    # 1st priority: The given chat template
    if chat_template is not None:
@ -488,10 +483,6 @@ def _log_chat_template_content_format(
        )


-@deprecate_kwargs(
-    "trust_remote_code",
-    additional_message="Please use `model_config.trust_remote_code` instead.",
-)
 def resolve_chat_template_content_format(
    chat_template: Optional[str],
    tools: Optional[list[dict[str, Any]]],
@ -499,7 +490,6 @@ def resolve_chat_template_content_format(
    tokenizer: AnyTokenizer,
    *,
    model_config: ModelConfig,
-    trust_remote_code: Optional[bool] = None,
 ) -> _ChatTemplateContentFormat:
    if given_format != "auto":
        return given_format
@ -568,17 +558,9 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):

        input_modality = modality.replace("_embeds", "")

-        if mm_registry.has_processor(model_config):
-            mm_processor = mm_registry.create_processor(model_config)
-            allowed_counts = mm_processor.info.get_allowed_mm_limits()
-            allowed_count = allowed_counts.get(input_modality, 0)
-        else:
-            mm_config = model_config.multimodal_config
-            if mm_config is None:
-                msg = "This model does not support multi-modal inputs"
-                raise ValueError(msg)
-
-            allowed_count = mm_config.get_limit_per_prompt(input_modality)
+        mm_processor = mm_registry.create_processor(model_config)
+        allowed_counts = mm_processor.info.get_allowed_mm_limits()
+        allowed_count = allowed_counts.get(input_modality, 0)

        current_count = len(self._items_by_modality[modality]) + 1
        if current_count > allowed_count:
@ -1285,10 +1267,6 @@ def parse_chat_messages_futures(
    return conversation, mm_tracker.all_mm_data()


-@deprecate_kwargs(
-    "trust_remote_code",
-    additional_message="Please use `model_config.trust_remote_code` instead.",
-)
 def apply_hf_chat_template(
    tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
    conversation: list[ConversationMessage],
@ -1297,8 +1275,6 @@ def apply_hf_chat_template(
    *,
    model_config: ModelConfig,
    tokenize: bool = False,  # Different from HF's default
-    # Deprecated, explicitly capture here so it doesn't slit into kwargs.
-    trust_remote_code: Optional[bool] = None,
    **kwargs: Any,
 ) -> str:
    hf_chat_template = resolve_hf_chat_template(
--- a/vllm/multimodal/registry.py
+++ b/vllm/multimodal/registry.py
@ -5,7 +5,6 @@ from dataclasses import dataclass
 from typing import TYPE_CHECKING, Generic, Optional, Protocol, TypeVar

 import torch.nn as nn
-from typing_extensions import deprecated

 from vllm.envs import VLLM_MM_INPUT_CACHE_GIB
 from vllm.inputs import InputProcessingContext
@ -105,13 +104,6 @@ class MultiModalRegistry:

        return True  # Success

-    @deprecated("Legacy input processor/mapper pipeline has been removed. "
-                "Please update your model runner to use "
-                "`seq_group_metadata.multi_modal_data` directly without "
-                "further processing.")
-    def create_input_mapper(self, model_config: "ModelConfig"):
-        return lambda data, mm_processor_kwargs: data
-
    def get_max_tokens_per_item_by_modality(
        self,
        model_config: "ModelConfig",
@ -182,16 +174,6 @@ class MultiModalRegistry:
        """
        return sum(self.get_max_tokens_by_modality(model_config).values())

-    @deprecated("Legacy input processor/mapper pipeline has been removed. "
-                "Please update your model runner to use "
-                "`seq_group_metadata.multi_modal_data` directly without "
-                "further processing.")
-    def init_mm_limits_per_prompt(
-        self,
-        model_config: "ModelConfig",
-    ) -> None:
-        pass
-
    def get_mm_limits_per_prompt(
        self,
        model_config: "ModelConfig",
@ -246,13 +228,6 @@ class MultiModalRegistry:
        model_cls, _ = get_model_architecture(model_config)
        return model_cls

-    @deprecated("Legacy input processor/mapper pipeline has been removed. "
-                "Please update your model runner to use "
-                "`seq_group_metadata.multi_modal_data` directly without "
-                "further processing.")
-    def has_processor(self, model_config: "ModelConfig") -> bool:
-        return True
-
    def create_processor(
        self,
        model_config: "ModelConfig",
--- a/vllm/worker/neuron_model_runner.py
+++ b/vllm/worker/neuron_model_runner.py
@ -15,8 +15,7 @@ from vllm.lora.request import LoRARequest
 from vllm.model_executor import SamplingMetadata
 from vllm.model_executor.layers.sampler import SamplerOutput
 from vllm.model_executor.model_loader.neuron import get_neuron_model
-from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
-                             MultiModalKwargs)
+from vllm.multimodal import BatchedTensorInputs, MultiModalKwargs
 from vllm.platforms import current_platform
 from vllm.sampling_params import SamplingParams
 from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
@ -88,10 +87,6 @@ class NeuronModelRunner(ModelRunnerBase[ModelInputForNeuron]):
        self.device = self.device_config.device
        self.pin_memory = is_pin_memory_available()

-        # Multi-modal data support
-        self.multi_modal_input_mapper = MULTIMODAL_REGISTRY \
-            .create_input_mapper(self.model_config)
-
        # Lazy initialization.
        self.model: nn.Module  # initialize after load_model.