mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-21 15:25:01 +08:00
Signed-off-by: Randy Chen <acad.randyjhc@gmail.com> Signed-off-by: Cody Yu <hao.yu.cody@gmail.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>
82 lines
2.6 KiB
Python
82 lines
2.6 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
# The CLI entrypoint to vLLM.
|
|
import os
|
|
import signal
|
|
import sys
|
|
|
|
import vllm.entrypoints.cli.benchmark.main
|
|
import vllm.entrypoints.cli.openai
|
|
import vllm.entrypoints.cli.serve
|
|
import vllm.version
|
|
from vllm.logger import init_logger
|
|
from vllm.utils import FlexibleArgumentParser
|
|
|
|
logger = init_logger(__name__)
|
|
|
|
CMD_MODULES = [
|
|
vllm.entrypoints.cli.openai,
|
|
vllm.entrypoints.cli.serve,
|
|
vllm.entrypoints.cli.benchmark.main,
|
|
]
|
|
|
|
|
|
def register_signal_handlers():
|
|
|
|
def signal_handler(sig, frame):
|
|
sys.exit(0)
|
|
|
|
signal.signal(signal.SIGINT, signal_handler)
|
|
signal.signal(signal.SIGTSTP, signal_handler)
|
|
|
|
|
|
def env_setup():
|
|
# The safest multiprocessing method is `spawn`, as the default `fork` method
|
|
# is not compatible with some accelerators. The default method will be
|
|
# changing in future versions of Python, so we should use it explicitly when
|
|
# possible.
|
|
#
|
|
# We only set it here in the CLI entrypoint, because changing to `spawn`
|
|
# could break some existing code using vLLM as a library. `spawn` will cause
|
|
# unexpected behavior if the code is not protected by
|
|
# `if __name__ == "__main__":`.
|
|
#
|
|
# References:
|
|
# - https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
|
|
# - https://pytorch.org/docs/stable/notes/multiprocessing.html#cuda-in-multiprocessing
|
|
# - https://pytorch.org/docs/stable/multiprocessing.html#sharing-cuda-tensors
|
|
# - https://docs.habana.ai/en/latest/PyTorch/Getting_Started_with_PyTorch_and_Gaudi/Getting_Started_with_PyTorch.html?highlight=multiprocessing#torch-multiprocessing-for-dataloaders
|
|
if "VLLM_WORKER_MULTIPROC_METHOD" not in os.environ:
|
|
logger.debug("Setting VLLM_WORKER_MULTIPROC_METHOD to 'spawn'")
|
|
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
|
|
|
|
|
|
def main():
|
|
env_setup()
|
|
|
|
parser = FlexibleArgumentParser(description="vLLM CLI")
|
|
parser.add_argument('-v',
|
|
'--version',
|
|
action='version',
|
|
version=vllm.version.__version__)
|
|
subparsers = parser.add_subparsers(required=False, dest="subparser")
|
|
cmds = {}
|
|
for cmd_module in CMD_MODULES:
|
|
new_cmds = cmd_module.cmd_init()
|
|
for cmd in new_cmds:
|
|
cmd.subparser_init(subparsers).set_defaults(
|
|
dispatch_function=cmd.cmd)
|
|
cmds[cmd.name] = cmd
|
|
args = parser.parse_args()
|
|
if args.subparser in cmds:
|
|
cmds[args.subparser].validate(args)
|
|
|
|
if hasattr(args, "dispatch_function"):
|
|
args.dispatch_function(args)
|
|
else:
|
|
parser.print_help()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|