[Bugfix] Disable multi-modal preprocessor cache for DP (#21896)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-07-31 18:30:02 +08:00 · 2025-08-01 15:03:56 +08:00 · 2025-08-01 15:03:56 +08:00 · b4e081cb15
commit b4e081cb15
parent 79731a79f0
3 changed files with 21 additions and 2 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -871,6 +871,12 @@ class ModelConfig:
        return None
    def set_disable_mm_preprocessor_cache(self, value: bool) -> None:
        mm_config = self.get_multimodal_config()
        self.disable_mm_preprocessor_cache = value
        mm_config.disable_mm_preprocessor_cache = value
    def _get_encoder_config(self):
        return get_sentence_transformer_tokenizer_config(
            self.model, self.revision)
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@ -1197,6 +1197,18 @@ class EngineArgs:
            enable_multimodal_encoder_data_parallel,
        )
        supports_mm_preprocessor_cache = (self.data_parallel_size == 1
                                          or data_parallel_external_lb)
        if (not supports_mm_preprocessor_cache
                and model_config.is_multimodal_model
                and not model_config.disable_mm_preprocessor_cache):
            logger.warning(
                "Multi-modal preprocessor cache is not compatible "
                "with data parallelism when there does not exist a "
                "one-to-one correspondance between API process and "
                "EngineCore process, so the cache will be disabled.")
            model_config.set_disable_mm_preprocessor_cache(True)
        speculative_config = self.create_speculative_config(
            target_model_config=model_config,
            target_parallel_config=parallel_config,
--- a/vllm/entrypoints/cli/serve.py
+++ b/vllm/entrypoints/cli/serve.py
@ -167,8 +167,9 @@ def run_multi_api_server(args: argparse.Namespace):
        if model_config.is_multimodal_model and not (
                orig_disable_mm_preprocessor_cache):
-            logger.warning("Multi-model preprocessor cache will be disabled "
+            logger.warning(
-                           "for api_server_count > 1")
+                "Multi-modal preprocessor cache is not compatible "
                "with api_server_count > 1, so the cache will be disabled.")
    executor_class = Executor.get_class(vllm_config)
    log_stats = not engine_args.disable_log_stats