From 82551ad616507de1cda0afb6babeec3f56f189eb Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Fri, 7 Mar 2025 00:03:31 +0800 Subject: [PATCH] [Core] Don't use cache during multi-modal profiling (#14336) --- vllm/inputs/registry.py | 4 +++- vllm/multimodal/registry.py | 16 ++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/vllm/inputs/registry.py b/vllm/inputs/registry.py index 691fcd7dc53f2..babfc4fb809c0 100644 --- a/vllm/inputs/registry.py +++ b/vllm/inputs/registry.py @@ -331,7 +331,9 @@ class InputRegistry: if mm_registry.has_processor(model_config): tokenizer = cached_tokenizer_from_config(model_config) - processor = mm_registry.create_processor(model_config, tokenizer) + processor = mm_registry.create_processor(model_config, + tokenizer, + disable_cache=True) profiler = MultiModalProfiler(processor) dummy_data = profiler.get_dummy_data( seq_len, is_encoder_data=is_encoder_data) diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py index 1882ffe9bf69f..a9eb250cb877f 100644 --- a/vllm/multimodal/registry.py +++ b/vllm/multimodal/registry.py @@ -257,7 +257,9 @@ class MultiModalRegistry: """ if self.has_processor(model_config): tokenizer = cached_tokenizer_from_config(model_config) - processor = self.create_processor(model_config, tokenizer) + processor = self.create_processor(model_config, + tokenizer, + disable_cache=True) seq_len = model_config.max_model_len mm_limits = self.get_mm_limits_per_prompt(model_config) return processor.info.get_mm_max_tokens_per_item( @@ -372,7 +374,9 @@ class MultiModalRegistry: """ if self.has_processor(model_config): tokenizer = cached_tokenizer_from_config(model_config) - processor = self.create_processor(model_config, tokenizer) + processor = self.create_processor(model_config, + tokenizer, + disable_cache=True) profiler = MultiModalProfiler(processor) return profiler.get_mm_limits() @@ -433,6 +437,8 @@ class MultiModalRegistry: self, model_config: "ModelConfig", tokenizer: AnyTokenizer, + *, + disable_cache: Optional[bool] = None, ) -> BaseMultiModalProcessor[BaseProcessingInfo]: """ Create a multi-modal processor for a specific model and tokenizer. @@ -440,11 +446,13 @@ class MultiModalRegistry: See also: :ref:`mm-processing` """ + if disable_cache is None: + disable_cache = model_config.disable_mm_preprocessor_cache + model_cls = self._get_model_cls(model_config) factories = self._processor_factories[model_cls] ctx = InputProcessingContext(model_config, tokenizer) - cache = (None if model_config.disable_mm_preprocessor_cache else - self._processing_cache) + cache = None if disable_cache else self._processing_cache return factories.build_processor(ctx, cache=cache)