mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-19 08:37:00 +08:00
[Bugfix] Disable multi-modal preprocessor cache for DP (#21896)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
79731a79f0
commit
b4e081cb15
@ -871,6 +871,12 @@ class ModelConfig:
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def set_disable_mm_preprocessor_cache(self, value: bool) -> None:
|
||||||
|
mm_config = self.get_multimodal_config()
|
||||||
|
|
||||||
|
self.disable_mm_preprocessor_cache = value
|
||||||
|
mm_config.disable_mm_preprocessor_cache = value
|
||||||
|
|
||||||
def _get_encoder_config(self):
|
def _get_encoder_config(self):
|
||||||
return get_sentence_transformer_tokenizer_config(
|
return get_sentence_transformer_tokenizer_config(
|
||||||
self.model, self.revision)
|
self.model, self.revision)
|
||||||
|
|||||||
@ -1197,6 +1197,18 @@ class EngineArgs:
|
|||||||
enable_multimodal_encoder_data_parallel,
|
enable_multimodal_encoder_data_parallel,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
supports_mm_preprocessor_cache = (self.data_parallel_size == 1
|
||||||
|
or data_parallel_external_lb)
|
||||||
|
if (not supports_mm_preprocessor_cache
|
||||||
|
and model_config.is_multimodal_model
|
||||||
|
and not model_config.disable_mm_preprocessor_cache):
|
||||||
|
logger.warning(
|
||||||
|
"Multi-modal preprocessor cache is not compatible "
|
||||||
|
"with data parallelism when there does not exist a "
|
||||||
|
"one-to-one correspondance between API process and "
|
||||||
|
"EngineCore process, so the cache will be disabled.")
|
||||||
|
model_config.set_disable_mm_preprocessor_cache(True)
|
||||||
|
|
||||||
speculative_config = self.create_speculative_config(
|
speculative_config = self.create_speculative_config(
|
||||||
target_model_config=model_config,
|
target_model_config=model_config,
|
||||||
target_parallel_config=parallel_config,
|
target_parallel_config=parallel_config,
|
||||||
|
|||||||
@ -167,8 +167,9 @@ def run_multi_api_server(args: argparse.Namespace):
|
|||||||
|
|
||||||
if model_config.is_multimodal_model and not (
|
if model_config.is_multimodal_model and not (
|
||||||
orig_disable_mm_preprocessor_cache):
|
orig_disable_mm_preprocessor_cache):
|
||||||
logger.warning("Multi-model preprocessor cache will be disabled "
|
logger.warning(
|
||||||
"for api_server_count > 1")
|
"Multi-modal preprocessor cache is not compatible "
|
||||||
|
"with api_server_count > 1, so the cache will be disabled.")
|
||||||
|
|
||||||
executor_class = Executor.get_class(vllm_config)
|
executor_class = Executor.get_class(vllm_config)
|
||||||
log_stats = not engine_args.disable_log_stats
|
log_stats = not engine_args.disable_log_stats
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user