mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-25 21:35:19 +08:00
preload heavy modules when mp method is forkserver (#22214)
Signed-off-by: Lionel Villard <villard@us.ibm.com>
This commit is contained in:
parent
14bcf93a6a
commit
ad6c655dde
@ -13,7 +13,6 @@ import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm import LLM, SamplingParams
|
||||
from vllm.benchmarks.lib.utils import (convert_to_pytorch_benchmark_format,
|
||||
write_to_json)
|
||||
from vllm.engine.arg_utils import EngineArgs
|
||||
@ -85,6 +84,9 @@ def main(args: argparse.Namespace):
|
||||
"Please set it to a valid path to use torch profiler.")
|
||||
engine_args = EngineArgs.from_cli_args(args)
|
||||
|
||||
# Lazy import to avoid importing LLM when the bench command is not selected.
|
||||
from vllm import LLM, SamplingParams
|
||||
|
||||
# NOTE(woosuk): If the request cannot be processed in a single batch,
|
||||
# the engine will automatically process the request in multiple batches.
|
||||
llm = LLM(**dataclasses.asdict(engine_args))
|
||||
|
||||
@ -8,6 +8,7 @@ import importlib
|
||||
import inspect
|
||||
import json
|
||||
import multiprocessing
|
||||
import multiprocessing.forkserver as forkserver
|
||||
import os
|
||||
import signal
|
||||
import socket
|
||||
@ -155,6 +156,15 @@ async def build_async_engine_client(
|
||||
client_config: Optional[dict[str, Any]] = None,
|
||||
) -> AsyncIterator[EngineClient]:
|
||||
|
||||
if os.getenv("VLLM_WORKER_MULTIPROC_METHOD") == "forkserver":
|
||||
# The executor is expected to be mp.
|
||||
# Pre-import heavy modules in the forkserver process
|
||||
logger.debug("Setup forkserver with pre-imports")
|
||||
multiprocessing.set_start_method('forkserver')
|
||||
multiprocessing.set_forkserver_preload(["vllm.v1.engine.async_llm"])
|
||||
forkserver.ensure_running()
|
||||
logger.debug("Forkserver setup complete!")
|
||||
|
||||
# Context manager to handle engine_client lifecycle
|
||||
# Ensures everything is shutdown and cleaned up on error/exit
|
||||
engine_args = AsyncEngineArgs.from_cli_args(args)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user