mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 03:25:01 +08:00
[Deprecation] Remove deprecated args and methods (#21907)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
3e36fcbee6
commit
9532a6d563
@ -48,7 +48,7 @@ from vllm.transformers_utils.chat_templates import (
|
|||||||
# yapf: enable
|
# yapf: enable
|
||||||
from vllm.transformers_utils.processor import cached_get_processor
|
from vllm.transformers_utils.processor import cached_get_processor
|
||||||
from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
|
from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
|
||||||
from vllm.utils import deprecate_kwargs, random_uuid
|
from vllm.utils import random_uuid
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
@ -383,17 +383,12 @@ def resolve_mistral_chat_template(
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@deprecate_kwargs(
|
|
||||||
"trust_remote_code",
|
|
||||||
additional_message="Please use `model_config.trust_remote_code` instead.",
|
|
||||||
)
|
|
||||||
def resolve_hf_chat_template(
|
def resolve_hf_chat_template(
|
||||||
tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
|
tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
|
||||||
chat_template: Optional[str],
|
chat_template: Optional[str],
|
||||||
tools: Optional[list[dict[str, Any]]],
|
tools: Optional[list[dict[str, Any]]],
|
||||||
*,
|
*,
|
||||||
model_config: ModelConfig,
|
model_config: ModelConfig,
|
||||||
trust_remote_code: Optional[bool] = None,
|
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
# 1st priority: The given chat template
|
# 1st priority: The given chat template
|
||||||
if chat_template is not None:
|
if chat_template is not None:
|
||||||
@ -488,10 +483,6 @@ def _log_chat_template_content_format(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@deprecate_kwargs(
|
|
||||||
"trust_remote_code",
|
|
||||||
additional_message="Please use `model_config.trust_remote_code` instead.",
|
|
||||||
)
|
|
||||||
def resolve_chat_template_content_format(
|
def resolve_chat_template_content_format(
|
||||||
chat_template: Optional[str],
|
chat_template: Optional[str],
|
||||||
tools: Optional[list[dict[str, Any]]],
|
tools: Optional[list[dict[str, Any]]],
|
||||||
@ -499,7 +490,6 @@ def resolve_chat_template_content_format(
|
|||||||
tokenizer: AnyTokenizer,
|
tokenizer: AnyTokenizer,
|
||||||
*,
|
*,
|
||||||
model_config: ModelConfig,
|
model_config: ModelConfig,
|
||||||
trust_remote_code: Optional[bool] = None,
|
|
||||||
) -> _ChatTemplateContentFormat:
|
) -> _ChatTemplateContentFormat:
|
||||||
if given_format != "auto":
|
if given_format != "auto":
|
||||||
return given_format
|
return given_format
|
||||||
@ -568,17 +558,9 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
|
|||||||
|
|
||||||
input_modality = modality.replace("_embeds", "")
|
input_modality = modality.replace("_embeds", "")
|
||||||
|
|
||||||
if mm_registry.has_processor(model_config):
|
|
||||||
mm_processor = mm_registry.create_processor(model_config)
|
mm_processor = mm_registry.create_processor(model_config)
|
||||||
allowed_counts = mm_processor.info.get_allowed_mm_limits()
|
allowed_counts = mm_processor.info.get_allowed_mm_limits()
|
||||||
allowed_count = allowed_counts.get(input_modality, 0)
|
allowed_count = allowed_counts.get(input_modality, 0)
|
||||||
else:
|
|
||||||
mm_config = model_config.multimodal_config
|
|
||||||
if mm_config is None:
|
|
||||||
msg = "This model does not support multi-modal inputs"
|
|
||||||
raise ValueError(msg)
|
|
||||||
|
|
||||||
allowed_count = mm_config.get_limit_per_prompt(input_modality)
|
|
||||||
|
|
||||||
current_count = len(self._items_by_modality[modality]) + 1
|
current_count = len(self._items_by_modality[modality]) + 1
|
||||||
if current_count > allowed_count:
|
if current_count > allowed_count:
|
||||||
@ -1285,10 +1267,6 @@ def parse_chat_messages_futures(
|
|||||||
return conversation, mm_tracker.all_mm_data()
|
return conversation, mm_tracker.all_mm_data()
|
||||||
|
|
||||||
|
|
||||||
@deprecate_kwargs(
|
|
||||||
"trust_remote_code",
|
|
||||||
additional_message="Please use `model_config.trust_remote_code` instead.",
|
|
||||||
)
|
|
||||||
def apply_hf_chat_template(
|
def apply_hf_chat_template(
|
||||||
tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
|
tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
|
||||||
conversation: list[ConversationMessage],
|
conversation: list[ConversationMessage],
|
||||||
@ -1297,8 +1275,6 @@ def apply_hf_chat_template(
|
|||||||
*,
|
*,
|
||||||
model_config: ModelConfig,
|
model_config: ModelConfig,
|
||||||
tokenize: bool = False, # Different from HF's default
|
tokenize: bool = False, # Different from HF's default
|
||||||
# Deprecated, explicitly capture here so it doesn't slit into kwargs.
|
|
||||||
trust_remote_code: Optional[bool] = None,
|
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> str:
|
) -> str:
|
||||||
hf_chat_template = resolve_hf_chat_template(
|
hf_chat_template = resolve_hf_chat_template(
|
||||||
|
|||||||
@ -5,7 +5,6 @@ from dataclasses import dataclass
|
|||||||
from typing import TYPE_CHECKING, Generic, Optional, Protocol, TypeVar
|
from typing import TYPE_CHECKING, Generic, Optional, Protocol, TypeVar
|
||||||
|
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from typing_extensions import deprecated
|
|
||||||
|
|
||||||
from vllm.envs import VLLM_MM_INPUT_CACHE_GIB
|
from vllm.envs import VLLM_MM_INPUT_CACHE_GIB
|
||||||
from vllm.inputs import InputProcessingContext
|
from vllm.inputs import InputProcessingContext
|
||||||
@ -105,13 +104,6 @@ class MultiModalRegistry:
|
|||||||
|
|
||||||
return True # Success
|
return True # Success
|
||||||
|
|
||||||
@deprecated("Legacy input processor/mapper pipeline has been removed. "
|
|
||||||
"Please update your model runner to use "
|
|
||||||
"`seq_group_metadata.multi_modal_data` directly without "
|
|
||||||
"further processing.")
|
|
||||||
def create_input_mapper(self, model_config: "ModelConfig"):
|
|
||||||
return lambda data, mm_processor_kwargs: data
|
|
||||||
|
|
||||||
def get_max_tokens_per_item_by_modality(
|
def get_max_tokens_per_item_by_modality(
|
||||||
self,
|
self,
|
||||||
model_config: "ModelConfig",
|
model_config: "ModelConfig",
|
||||||
@ -182,16 +174,6 @@ class MultiModalRegistry:
|
|||||||
"""
|
"""
|
||||||
return sum(self.get_max_tokens_by_modality(model_config).values())
|
return sum(self.get_max_tokens_by_modality(model_config).values())
|
||||||
|
|
||||||
@deprecated("Legacy input processor/mapper pipeline has been removed. "
|
|
||||||
"Please update your model runner to use "
|
|
||||||
"`seq_group_metadata.multi_modal_data` directly without "
|
|
||||||
"further processing.")
|
|
||||||
def init_mm_limits_per_prompt(
|
|
||||||
self,
|
|
||||||
model_config: "ModelConfig",
|
|
||||||
) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def get_mm_limits_per_prompt(
|
def get_mm_limits_per_prompt(
|
||||||
self,
|
self,
|
||||||
model_config: "ModelConfig",
|
model_config: "ModelConfig",
|
||||||
@ -246,13 +228,6 @@ class MultiModalRegistry:
|
|||||||
model_cls, _ = get_model_architecture(model_config)
|
model_cls, _ = get_model_architecture(model_config)
|
||||||
return model_cls
|
return model_cls
|
||||||
|
|
||||||
@deprecated("Legacy input processor/mapper pipeline has been removed. "
|
|
||||||
"Please update your model runner to use "
|
|
||||||
"`seq_group_metadata.multi_modal_data` directly without "
|
|
||||||
"further processing.")
|
|
||||||
def has_processor(self, model_config: "ModelConfig") -> bool:
|
|
||||||
return True
|
|
||||||
|
|
||||||
def create_processor(
|
def create_processor(
|
||||||
self,
|
self,
|
||||||
model_config: "ModelConfig",
|
model_config: "ModelConfig",
|
||||||
|
|||||||
@ -15,8 +15,7 @@ from vllm.lora.request import LoRARequest
|
|||||||
from vllm.model_executor import SamplingMetadata
|
from vllm.model_executor import SamplingMetadata
|
||||||
from vllm.model_executor.layers.sampler import SamplerOutput
|
from vllm.model_executor.layers.sampler import SamplerOutput
|
||||||
from vllm.model_executor.model_loader.neuron import get_neuron_model
|
from vllm.model_executor.model_loader.neuron import get_neuron_model
|
||||||
from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
|
from vllm.multimodal import BatchedTensorInputs, MultiModalKwargs
|
||||||
MultiModalKwargs)
|
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
|
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
|
||||||
@ -88,10 +87,6 @@ class NeuronModelRunner(ModelRunnerBase[ModelInputForNeuron]):
|
|||||||
self.device = self.device_config.device
|
self.device = self.device_config.device
|
||||||
self.pin_memory = is_pin_memory_available()
|
self.pin_memory = is_pin_memory_available()
|
||||||
|
|
||||||
# Multi-modal data support
|
|
||||||
self.multi_modal_input_mapper = MULTIMODAL_REGISTRY \
|
|
||||||
.create_input_mapper(self.model_config)
|
|
||||||
|
|
||||||
# Lazy initialization.
|
# Lazy initialization.
|
||||||
self.model: nn.Module # initialize after load_model.
|
self.model: nn.Module # initialize after load_model.
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user