From b4e081cb150797b12039cc1232205dbb25ca0206 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Fri, 1 Aug 2025 15:03:56 +0800 Subject: [PATCH] [Bugfix] Disable multi-modal preprocessor cache for DP (#21896) Signed-off-by: DarkLight1337 --- vllm/config.py | 6 ++++++ vllm/engine/arg_utils.py | 12 ++++++++++++ vllm/entrypoints/cli/serve.py | 5 +++-- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 9d5739ca11efd..93daab7d6ae97 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -871,6 +871,12 @@ class ModelConfig: return None + def set_disable_mm_preprocessor_cache(self, value: bool) -> None: + mm_config = self.get_multimodal_config() + + self.disable_mm_preprocessor_cache = value + mm_config.disable_mm_preprocessor_cache = value + def _get_encoder_config(self): return get_sentence_transformer_tokenizer_config( self.model, self.revision) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index c36c79c69317e..78272d983eaf5 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1197,6 +1197,18 @@ class EngineArgs: enable_multimodal_encoder_data_parallel, ) + supports_mm_preprocessor_cache = (self.data_parallel_size == 1 + or data_parallel_external_lb) + if (not supports_mm_preprocessor_cache + and model_config.is_multimodal_model + and not model_config.disable_mm_preprocessor_cache): + logger.warning( + "Multi-modal preprocessor cache is not compatible " + "with data parallelism when there does not exist a " + "one-to-one correspondance between API process and " + "EngineCore process, so the cache will be disabled.") + model_config.set_disable_mm_preprocessor_cache(True) + speculative_config = self.create_speculative_config( target_model_config=model_config, target_parallel_config=parallel_config, diff --git a/vllm/entrypoints/cli/serve.py b/vllm/entrypoints/cli/serve.py index 7dcba2cccdb52..bdbe71b832f4f 100644 --- a/vllm/entrypoints/cli/serve.py +++ b/vllm/entrypoints/cli/serve.py @@ -167,8 +167,9 @@ def run_multi_api_server(args: argparse.Namespace): if model_config.is_multimodal_model and not ( orig_disable_mm_preprocessor_cache): - logger.warning("Multi-model preprocessor cache will be disabled " - "for api_server_count > 1") + logger.warning( + "Multi-modal preprocessor cache is not compatible " + "with api_server_count > 1, so the cache will be disabled.") executor_class = Executor.get_class(vllm_config) log_stats = not engine_args.disable_log_stats