diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index 2cf42d57ec21..ff1ac0e18f32 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Any, ClassVar, Literal, cast from pydantic import Field, field_validator from pydantic.dataclasses import dataclass -from typing_extensions import Self, deprecated +from typing_extensions import Self from vllm.config.utils import config from vllm.logger import init_logger @@ -224,19 +224,6 @@ class SchedulerConfig: self.verify_max_model_len(max_model_len) - @property - @deprecated( - "`SchedulerConfig.chunked_prefill_enabled` has been renamed to " - "`SchedulerConfig.enable_chunked_prefill`. " - "The old name will be removed in v0.12." - ) - def chunked_prefill_enabled(self) -> bool: - return self.enable_chunked_prefill - - @chunked_prefill_enabled.setter - def chunked_prefill_enabled(self, value: bool): - self.enable_chunked_prefill = value - def verify_max_model_len(self, max_model_len: int) -> Self: if ( self.max_num_batched_tokens < max_model_len diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py index 69c28e278f2d..52b433cfaf1b 100644 --- a/vllm/distributed/parallel_state.py +++ b/vllm/distributed/parallel_state.py @@ -41,7 +41,6 @@ import torch.distributed import torch.distributed._functional_collectives as funcol import torch.distributed._symmetric_memory from torch.distributed import Backend, ProcessGroup -from typing_extensions import deprecated import vllm.envs as envs from vllm.distributed.device_communicators.base_device_communicator import ( @@ -1078,15 +1077,6 @@ def get_tp_group() -> GroupCoordinator: return _TP -@deprecated( - "`get_tensor_model_parallel_group` has been replaced with " - "`get_tp_group` and may be removed after v0.12. Please use " - "`get_tp_group` instead." -) -def get_tensor_model_parallel_group(): - return get_tp_group() - - _DCP: GroupCoordinator | None = None @@ -1130,15 +1120,6 @@ def get_pcp_group() -> GroupCoordinator: return _PCP -@deprecated( - "`get_pipeline_model_parallel_group` has been replaced with " - "`get_pp_group` and may be removed in v0.12. Please use " - "`get_pp_group` instead." -) -def get_pipeline_model_parallel_group(): - return get_pp_group() - - @contextmanager def graph_capture(device: torch.device): """ diff --git a/vllm/model_executor/models/utils.py b/vllm/model_executor/models/utils.py index ccefd7e66697..f25ab9153a50 100644 --- a/vllm/model_executor/models/utils.py +++ b/vllm/model_executor/models/utils.py @@ -10,7 +10,6 @@ import torch import torch.nn as nn from torch.func import functional_call from transformers import PretrainedConfig -from typing_extensions import deprecated from vllm.config import VllmConfig from vllm.distributed import ( @@ -481,54 +480,6 @@ def _merge_multimodal_embeddings( return inputs_embeds -@deprecated( - "`merge_multimodal_embeddings` has been replaced with " - "`SupportsMultiModal.embed_input_ids` and will be " - "removed in v0.12." -) -def merge_multimodal_embeddings( - input_ids: torch.Tensor, - inputs_embeds: torch.Tensor, - multimodal_embeddings: NestedTensors, - placeholder_token_id: int | list[int], -) -> torch.Tensor: - """ - Merge `multimodal_embeddings` into `inputs_embeds` by overwriting the - positions in `inputs_embeds` corresponding to placeholder tokens in - `input_ids`. - - `placeholder_token_id` can be a list of token ids (e.g, token ids - of img_start, img_break, and img_end tokens) when needed: This means - the order of these tokens in the `input_ids` MUST MATCH the order of - their embeddings in `multimodal_embeddings` since we need to - slice-merge instead of individually scattering. - - For example, if input_ids is "TTTTTSIIIBIIIBIIIETTT", where - - T is text token - - S is image start token - - I is image embedding token - - B is image break token - - E is image end token. - - Then the image embeddings (that correspond to I's) from vision encoder - must be padded with embeddings of S, B, and E in the same order of - input_ids for a correct embedding merge. - - Note: - This updates `inputs_embeds` in place. - """ - if isinstance(placeholder_token_id, list): - is_multimodal = isin_list(input_ids, placeholder_token_id) - else: - is_multimodal = input_ids == placeholder_token_id - - return _merge_multimodal_embeddings( - inputs_embeds, - multimodal_embeddings=multimodal_embeddings, - is_multimodal=is_multimodal, - ) - - def isin_list( elements: torch.Tensor, test_elements_list: list[int], diff --git a/vllm/v1/core/sched/output.py b/vllm/v1/core/sched/output.py index 7902513dce49..abfab43499b2 100644 --- a/vllm/v1/core/sched/output.py +++ b/vllm/v1/core/sched/output.py @@ -126,12 +126,12 @@ class CachedRequestData: return len(self.req_ids) @cached_property - @deprecated("use resumed_req_ids field") + @deprecated("This will be removed in v0.14, use `resumed_req_ids` instead.") def resumed_from_preemption(self) -> list[bool]: return [req_id in self.resumed_req_ids for req_id in self.req_ids] @cached_property - @deprecated("use all_token_ids field") + @deprecated("This will be removed in v0.14, use `all_token_ids` instead.") def resumed_req_token_ids(self) -> list[list[int] | None]: return [ self.all_token_ids[req_id] if req_id in self.resumed_req_ids else None