mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-31 22:57:09 +08:00
Add full serve CLI reference back to docs (#20978)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
1e36c8687e
commit
b637e9dcb8
@ -1,3 +1,7 @@
|
|||||||
|
---
|
||||||
|
toc_depth: 4
|
||||||
|
---
|
||||||
|
|
||||||
# vLLM CLI Guide
|
# vLLM CLI Guide
|
||||||
|
|
||||||
The vllm command-line tool is used to run and manage vLLM models. You can start by viewing the help message with:
|
The vllm command-line tool is used to run and manage vLLM models. You can start by viewing the help message with:
|
||||||
@ -42,6 +46,10 @@ Start the vLLM OpenAI Compatible API server.
|
|||||||
vllm serve --help=page
|
vllm serve --help=page
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Options
|
||||||
|
|
||||||
|
--8<-- "docs/argparse/serve.md"
|
||||||
|
|
||||||
## chat
|
## chat
|
||||||
|
|
||||||
Generate chat completions via the running API server.
|
Generate chat completions via the running API server.
|
||||||
|
|||||||
@ -5,7 +5,7 @@ The `vllm serve` command is used to launch the OpenAI-compatible server.
|
|||||||
## CLI Arguments
|
## CLI Arguments
|
||||||
|
|
||||||
The `vllm serve` command is used to launch the OpenAI-compatible server.
|
The `vllm serve` command is used to launch the OpenAI-compatible server.
|
||||||
To see the available CLI arguments, run `vllm serve --help`!
|
To see the available options, take a look at the [CLI Reference](../cli/README.md#options)!
|
||||||
|
|
||||||
## Configuration file
|
## Configuration file
|
||||||
|
|
||||||
|
|||||||
@ -16,6 +16,7 @@ sys.modules["blake3"] = MagicMock()
|
|||||||
sys.modules["vllm._C"] = MagicMock()
|
sys.modules["vllm._C"] = MagicMock()
|
||||||
|
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs # noqa: E402
|
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs # noqa: E402
|
||||||
|
from vllm.entrypoints.openai.cli_args import make_arg_parser # noqa: E402
|
||||||
from vllm.utils import FlexibleArgumentParser # noqa: E402
|
from vllm.utils import FlexibleArgumentParser # noqa: E402
|
||||||
|
|
||||||
logger = logging.getLogger("mkdocs")
|
logger = logging.getLogger("mkdocs")
|
||||||
@ -24,15 +25,18 @@ logger = logging.getLogger("mkdocs")
|
|||||||
class MarkdownFormatter(HelpFormatter):
|
class MarkdownFormatter(HelpFormatter):
|
||||||
"""Custom formatter that generates markdown for argument groups."""
|
"""Custom formatter that generates markdown for argument groups."""
|
||||||
|
|
||||||
def __init__(self, prog):
|
def __init__(self, prog, starting_heading_level=3):
|
||||||
super().__init__(prog,
|
super().__init__(prog,
|
||||||
max_help_position=float('inf'),
|
max_help_position=float('inf'),
|
||||||
width=float('inf'))
|
width=float('inf'))
|
||||||
|
self._section_heading_prefix = "#" * starting_heading_level
|
||||||
|
self._argument_heading_prefix = "#" * (starting_heading_level + 1)
|
||||||
self._markdown_output = []
|
self._markdown_output = []
|
||||||
|
|
||||||
def start_section(self, heading):
|
def start_section(self, heading):
|
||||||
if heading not in {"positional arguments", "options"}:
|
if heading not in {"positional arguments", "options"}:
|
||||||
self._markdown_output.append(f"\n### {heading}\n\n")
|
heading_md = f"\n{self._section_heading_prefix} {heading}\n\n"
|
||||||
|
self._markdown_output.append(heading_md)
|
||||||
|
|
||||||
def end_section(self):
|
def end_section(self):
|
||||||
pass
|
pass
|
||||||
@ -46,9 +50,13 @@ class MarkdownFormatter(HelpFormatter):
|
|||||||
|
|
||||||
def add_arguments(self, actions):
|
def add_arguments(self, actions):
|
||||||
for action in actions:
|
for action in actions:
|
||||||
|
if (len(action.option_strings) == 0
|
||||||
|
or "--help" in action.option_strings):
|
||||||
|
continue
|
||||||
|
|
||||||
option_strings = f'`{"`, `".join(action.option_strings)}`'
|
option_strings = f'`{"`, `".join(action.option_strings)}`'
|
||||||
self._markdown_output.append(f"#### {option_strings}\n\n")
|
heading_md = f"{self._argument_heading_prefix} {option_strings}\n\n"
|
||||||
|
self._markdown_output.append(heading_md)
|
||||||
|
|
||||||
if choices := action.choices:
|
if choices := action.choices:
|
||||||
choices = f'`{"`, `".join(str(c) for c in choices)}`'
|
choices = f'`{"`, `".join(str(c) for c in choices)}`'
|
||||||
@ -81,6 +89,14 @@ def create_parser(cls, **kwargs) -> FlexibleArgumentParser:
|
|||||||
return cls.add_cli_args(parser, **kwargs)
|
return cls.add_cli_args(parser, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def create_serve_parser() -> FlexibleArgumentParser:
|
||||||
|
"""Create a parser for the serve command with markdown formatting."""
|
||||||
|
parser = FlexibleArgumentParser()
|
||||||
|
parser.formatter_class = lambda prog: MarkdownFormatter(
|
||||||
|
prog, starting_heading_level=4)
|
||||||
|
return make_arg_parser(parser)
|
||||||
|
|
||||||
|
|
||||||
def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
|
def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
|
||||||
logger.info("Generating argparse documentation")
|
logger.info("Generating argparse documentation")
|
||||||
logger.debug("Root directory: %s", ROOT_DIR.resolve())
|
logger.debug("Root directory: %s", ROOT_DIR.resolve())
|
||||||
@ -95,6 +111,7 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
|
|||||||
"engine_args": create_parser(EngineArgs),
|
"engine_args": create_parser(EngineArgs),
|
||||||
"async_engine_args": create_parser(AsyncEngineArgs,
|
"async_engine_args": create_parser(AsyncEngineArgs,
|
||||||
async_args_only=True),
|
async_args_only=True),
|
||||||
|
"serve": create_serve_parser(),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Generate documentation for each parser
|
# Generate documentation for each parser
|
||||||
|
|||||||
@ -17,6 +17,7 @@ cloudpickle
|
|||||||
fastapi
|
fastapi
|
||||||
msgspec
|
msgspec
|
||||||
openai
|
openai
|
||||||
|
partial-json-parser
|
||||||
pillow
|
pillow
|
||||||
psutil
|
psutil
|
||||||
pybase64
|
pybase64
|
||||||
|
|||||||
@ -67,37 +67,6 @@ class ServeSubcommand(CLISubcommand):
|
|||||||
help="Start the vLLM OpenAI Compatible API server.",
|
help="Start the vLLM OpenAI Compatible API server.",
|
||||||
description="Start the vLLM OpenAI Compatible API server.",
|
description="Start the vLLM OpenAI Compatible API server.",
|
||||||
usage="vllm serve [model_tag] [options]")
|
usage="vllm serve [model_tag] [options]")
|
||||||
serve_parser.add_argument("model_tag",
|
|
||||||
type=str,
|
|
||||||
nargs='?',
|
|
||||||
help="The model tag to serve "
|
|
||||||
"(optional if specified in config)")
|
|
||||||
serve_parser.add_argument(
|
|
||||||
"--headless",
|
|
||||||
action='store_true',
|
|
||||||
default=False,
|
|
||||||
help="Run in headless mode. See multi-node data parallel "
|
|
||||||
"documentation for more details.")
|
|
||||||
serve_parser.add_argument(
|
|
||||||
'--data-parallel-start-rank',
|
|
||||||
'-dpr',
|
|
||||||
type=int,
|
|
||||||
default=0,
|
|
||||||
help="Starting data parallel rank for secondary nodes. "
|
|
||||||
"Requires --headless.")
|
|
||||||
serve_parser.add_argument('--api-server-count',
|
|
||||||
'-asc',
|
|
||||||
type=int,
|
|
||||||
default=1,
|
|
||||||
help='How many API server processes to run.')
|
|
||||||
serve_parser.add_argument(
|
|
||||||
"--config",
|
|
||||||
type=str,
|
|
||||||
default='',
|
|
||||||
required=False,
|
|
||||||
help="Read CLI options from a config file. "
|
|
||||||
"Must be a YAML with the following options: "
|
|
||||||
"https://docs.vllm.ai/en/latest/configuration/serve_args.html")
|
|
||||||
|
|
||||||
serve_parser = make_arg_parser(serve_parser)
|
serve_parser = make_arg_parser(serve_parser)
|
||||||
show_filtered_argument_or_group_from_help(serve_parser, ["serve"])
|
show_filtered_argument_or_group_from_help(serve_parser, ["serve"])
|
||||||
|
|||||||
@ -236,6 +236,34 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
|||||||
register all arguments instead of manually enumerating them here. This
|
register all arguments instead of manually enumerating them here. This
|
||||||
avoids code duplication and keeps the argument definitions in one place.
|
avoids code duplication and keeps the argument definitions in one place.
|
||||||
"""
|
"""
|
||||||
|
parser.add_argument("model_tag",
|
||||||
|
type=str,
|
||||||
|
nargs="?",
|
||||||
|
help="The model tag to serve "
|
||||||
|
"(optional if specified in config)")
|
||||||
|
parser.add_argument(
|
||||||
|
"--headless",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
help="Run in headless mode. See multi-node data parallel "
|
||||||
|
"documentation for more details.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--data-parallel-start-rank",
|
||||||
|
"-dpr",
|
||||||
|
type=int,
|
||||||
|
default=0,
|
||||||
|
help="Starting data parallel rank for secondary nodes. "
|
||||||
|
"Requires --headless.")
|
||||||
|
parser.add_argument("--api-server-count",
|
||||||
|
"-asc",
|
||||||
|
type=int,
|
||||||
|
default=1,
|
||||||
|
help="How many API server processes to run.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--config",
|
||||||
|
help="Read CLI options from a config file. "
|
||||||
|
"Must be a YAML with the following options: "
|
||||||
|
"https://docs.vllm.ai/en/latest/configuration/serve_args.html")
|
||||||
parser = FrontendArgs.add_cli_args(parser)
|
parser = FrontendArgs.add_cli_args(parser)
|
||||||
parser = AsyncEngineArgs.add_cli_args(parser)
|
parser = AsyncEngineArgs.add_cli_args(parser)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user