[V0 Deprecation] Remove V0-only methods in multi-modal registry (#25362)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: yewentao256 <zhyanwentao@126.com>
2025-12-21 11:05:42 +08:00 · 2025-09-22 13:58:26 +08:00 · 2025-09-22 13:58:26 +08:00 · 243c358fa8
commit 243c358fa8
parent 1b3aa0f297
2 changed files with 1 additions and 32 deletions
--- a/tests/models/multimodal/generation/test_qwen2_vl.py
+++ b/tests/models/multimodal/generation/test_qwen2_vl.py
@ -209,7 +209,6 @@ def batch_make_video_embeddings(
            return visual(pixel_values_on_device,
                          grid_thw=video_grid_thw_on_device).cpu()
    # V1 Test: this calls a V0 internal.
    video_embeds = torch.concat(llm.apply_model(get_image_embeds))
    # split into original batches
--- a/vllm/multimodal/registry.py
+++ b/vllm/multimodal/registry.py
@ -12,8 +12,7 @@ from vllm.transformers_utils.tokenizer import (AnyTokenizer,
                                               cached_tokenizer_from_config)
 from vllm.utils import ClassRegistry
-from .cache import (BaseMultiModalProcessorCache,
+from .cache import BaseMultiModalProcessorCache
                    processor_only_cache_from_config)
 from .processing import BaseMultiModalProcessor, BaseProcessingInfo
 from .profiling import (BaseDummyInputsBuilder, DummyDecoderData,
                        DummyEncoderData, MultiModalProfiler)
@ -176,35 +175,6 @@ class MultiModalRegistry:
            if mm_limits[key] > 0
        }
    # TODO: Remove once V0 is gone
    def get_max_tokens_by_modality(
        self,
        model_config: "ModelConfig",
    ) -> Mapping[str, int]:
        """
        Get the maximum number of tokens from each modality
        for profiling the memory usage of a model.
        """
        cache = processor_only_cache_from_config(model_config, self)
        mm_limits = self.get_mm_limits_per_prompt(model_config, cache=cache)
        max_tokens_per_item = self.get_max_tokens_per_item_by_modality(
            model_config,
            cache=cache,
        )
        return {
            key: mm_limits[key] * max_tokens_per_mm_item
            for key, max_tokens_per_mm_item in max_tokens_per_item.items()
        }
    # TODO: Remove once V0 is gone
    def get_max_multimodal_tokens(self, model_config: "ModelConfig") -> int:
        """
        Get the maximum number of multi-modal tokens
        for profiling the memory usage of a model.
        """
        return sum(self.get_max_tokens_by_modality(model_config).values())
    def get_mm_limits_per_prompt(
        self,
        model_config: "ModelConfig",