mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 04:24:57 +08:00
[Bugfix] Actually disable processing cache when API server is scaled out (#21839)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
b7b23da4d2
commit
44bc46da60
@ -140,11 +140,16 @@ def run_multi_api_server(args: argparse.Namespace):
|
|||||||
num_api_servers = args.api_server_count
|
num_api_servers = args.api_server_count
|
||||||
assert num_api_servers > 0
|
assert num_api_servers > 0
|
||||||
|
|
||||||
|
orig_disable_mm_preprocessor_cache = args.disable_mm_preprocessor_cache
|
||||||
|
|
||||||
# set_process_title("ProcManager")
|
# set_process_title("ProcManager")
|
||||||
|
|
||||||
if num_api_servers > 1:
|
if num_api_servers > 1:
|
||||||
setup_multiprocess_prometheus()
|
setup_multiprocess_prometheus()
|
||||||
|
|
||||||
|
# Not compatible with API server scale-out
|
||||||
|
args.disable_mm_preprocessor_cache = True
|
||||||
|
|
||||||
listen_address, sock = setup_server(args)
|
listen_address, sock = setup_server(args)
|
||||||
|
|
||||||
engine_args = vllm.AsyncEngineArgs.from_cli_args(args)
|
engine_args = vllm.AsyncEngineArgs.from_cli_args(args)
|
||||||
@ -161,11 +166,9 @@ def run_multi_api_server(args: argparse.Namespace):
|
|||||||
"with api_server_count > 1")
|
"with api_server_count > 1")
|
||||||
|
|
||||||
if model_config.is_multimodal_model and not (
|
if model_config.is_multimodal_model and not (
|
||||||
model_config.disable_mm_preprocessor_cache):
|
orig_disable_mm_preprocessor_cache):
|
||||||
logger.warning(
|
logger.warning("Multi-model preprocessor cache will be disabled "
|
||||||
"Multi-model preprocessor cache will be disabled for"
|
"for api_server_count > 1")
|
||||||
" api_server_count > 1")
|
|
||||||
model_config.disable_mm_preprocessor_cache = True
|
|
||||||
|
|
||||||
executor_class = Executor.get_class(vllm_config)
|
executor_class = Executor.get_class(vllm_config)
|
||||||
log_stats = not engine_args.disable_log_stats
|
log_stats = not engine_args.disable_log_stats
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user