mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 08:35:01 +08:00
[Misc] cli auto show default value (#15582)
Signed-off-by: reidliu41 <reid201711@gmail.com>
This commit is contained in:
parent
c3f687ac22
commit
26df46ee59
@ -726,15 +726,13 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
|||||||
default="ttft,tpot,itl",
|
default="ttft,tpot,itl",
|
||||||
help="Comma-seperated list of selected metrics to report percentils. "
|
help="Comma-seperated list of selected metrics to report percentils. "
|
||||||
"This argument specifies the metrics to report percentiles. "
|
"This argument specifies the metrics to report percentiles. "
|
||||||
"Allowed metric names are \"ttft\", \"tpot\", \"itl\", \"e2el\". "
|
"Allowed metric names are \"ttft\", \"tpot\", \"itl\", \"e2el\". ")
|
||||||
"Default value is \"ttft,tpot,itl\".")
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--metric-percentiles",
|
"--metric-percentiles",
|
||||||
type=str,
|
type=str,
|
||||||
default="99",
|
default="99",
|
||||||
help="Comma-seperated list of percentiles for selected metrics. "
|
help="Comma-seperated list of percentiles for selected metrics. "
|
||||||
"To report 25-th, 50-th, and 75-th percentiles, use \"25,50,75\". "
|
"To report 25-th, 50-th, and 75-th percentiles, use \"25,50,75\". "
|
||||||
"Default value is \"99\". "
|
|
||||||
"Use \"--percentile-metrics\" to select metrics.",
|
"Use \"--percentile-metrics\" to select metrics.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|||||||
@ -322,9 +322,7 @@ class EngineArgs:
|
|||||||
parser.add_argument('--download-dir',
|
parser.add_argument('--download-dir',
|
||||||
type=nullable_str,
|
type=nullable_str,
|
||||||
default=EngineArgs.download_dir,
|
default=EngineArgs.download_dir,
|
||||||
help='Directory to download and load the weights, '
|
help='Directory to download and load the weights.')
|
||||||
'default to the default cache dir of '
|
|
||||||
'huggingface.')
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--load-format',
|
'--load-format',
|
||||||
type=str,
|
type=str,
|
||||||
@ -399,8 +397,7 @@ class EngineArgs:
|
|||||||
'Valid backend values are "xgrammar", "guidance", and "auto". '
|
'Valid backend values are "xgrammar", "guidance", and "auto". '
|
||||||
'With "auto", we will make opinionated choices based on request'
|
'With "auto", we will make opinionated choices based on request'
|
||||||
'contents and what the backend libraries currently support, so '
|
'contents and what the backend libraries currently support, so '
|
||||||
'the behavior is subject to change in each release. '
|
'the behavior is subject to change in each release.')
|
||||||
'The default is xgrammar.')
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--logits-processor-pattern',
|
'--logits-processor-pattern',
|
||||||
type=nullable_str,
|
type=nullable_str,
|
||||||
@ -493,8 +490,7 @@ class EngineArgs:
|
|||||||
default=EngineArgs.prefix_caching_hash_algo,
|
default=EngineArgs.prefix_caching_hash_algo,
|
||||||
help="Set the hash algorithm for prefix caching. "
|
help="Set the hash algorithm for prefix caching. "
|
||||||
"Options are 'builtin' (Python's built-in hash) or 'sha256' "
|
"Options are 'builtin' (Python's built-in hash) or 'sha256' "
|
||||||
"(collision resistant but with certain overheads). Defaults "
|
"(collision resistant but with certain overheads).",
|
||||||
"to 'builtin'.",
|
|
||||||
)
|
)
|
||||||
parser.add_argument('--disable-sliding-window',
|
parser.add_argument('--disable-sliding-window',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
@ -568,9 +564,7 @@ class EngineArgs:
|
|||||||
type=int,
|
type=int,
|
||||||
default=EngineArgs.max_num_partial_prefills,
|
default=EngineArgs.max_num_partial_prefills,
|
||||||
help="For chunked prefill, the max number of concurrent \
|
help="For chunked prefill, the max number of concurrent \
|
||||||
partial prefills."
|
partial prefills.")
|
||||||
"Defaults to 1",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--max-long-partial-prefills",
|
"--max-long-partial-prefills",
|
||||||
type=int,
|
type=int,
|
||||||
@ -579,15 +573,13 @@ class EngineArgs:
|
|||||||
"than --long-prefill-token-threshold that will be prefilled "
|
"than --long-prefill-token-threshold that will be prefilled "
|
||||||
"concurrently. Setting this less than --max-num-partial-prefills "
|
"concurrently. Setting this less than --max-num-partial-prefills "
|
||||||
"will allow shorter prompts to jump the queue in front of longer "
|
"will allow shorter prompts to jump the queue in front of longer "
|
||||||
"prompts in some cases, improving latency. Defaults to 1.")
|
"prompts in some cases, improving latency.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--long-prefill-token-threshold",
|
"--long-prefill-token-threshold",
|
||||||
type=float,
|
type=float,
|
||||||
default=EngineArgs.long_prefill_token_threshold,
|
default=EngineArgs.long_prefill_token_threshold,
|
||||||
help="For chunked prefill, a request is considered long if the "
|
help="For chunked prefill, a request is considered long if the "
|
||||||
"prompt is longer than this number of tokens. Defaults to 4%% of "
|
"prompt is longer than this number of tokens.")
|
||||||
"the model's context length.",
|
|
||||||
)
|
|
||||||
parser.add_argument('--max-num-seqs',
|
parser.add_argument('--max-num-seqs',
|
||||||
type=int,
|
type=int,
|
||||||
default=EngineArgs.max_num_seqs,
|
default=EngineArgs.max_num_seqs,
|
||||||
@ -739,8 +731,7 @@ class EngineArgs:
|
|||||||
type=int,
|
type=int,
|
||||||
default=EngineArgs.max_cpu_loras,
|
default=EngineArgs.max_cpu_loras,
|
||||||
help=('Maximum number of LoRAs to store in CPU memory. '
|
help=('Maximum number of LoRAs to store in CPU memory. '
|
||||||
'Must be >= than max_loras. '
|
'Must be >= than max_loras.'))
|
||||||
'Defaults to max_loras.'))
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--fully-sharded-loras',
|
'--fully-sharded-loras',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
@ -894,7 +885,7 @@ class EngineArgs:
|
|||||||
help='Set the lower bound threshold for the posterior '
|
help='Set the lower bound threshold for the posterior '
|
||||||
'probability of a token to be accepted. This threshold is '
|
'probability of a token to be accepted. This threshold is '
|
||||||
'used by the TypicalAcceptanceSampler to make sampling decisions '
|
'used by the TypicalAcceptanceSampler to make sampling decisions '
|
||||||
'during speculative decoding. Defaults to 0.09')
|
'during speculative decoding.')
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--typical-acceptance-sampler-posterior-alpha',
|
'--typical-acceptance-sampler-posterior-alpha',
|
||||||
|
|||||||
@ -247,7 +247,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
|||||||
default=None,
|
default=None,
|
||||||
help='Max number of prompt characters or prompt '
|
help='Max number of prompt characters or prompt '
|
||||||
'ID numbers being printed in log.'
|
'ID numbers being printed in log.'
|
||||||
'\n\nDefault: Unlimited')
|
' The default of None means unlimited.')
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--disable-fastapi-docs",
|
"--disable-fastapi-docs",
|
||||||
|
|||||||
@ -1212,7 +1212,7 @@ class StoreBoolean(argparse.Action):
|
|||||||
"Expected 'true' or 'false'.")
|
"Expected 'true' or 'false'.")
|
||||||
|
|
||||||
|
|
||||||
class SortedHelpFormatter(argparse.HelpFormatter):
|
class SortedHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
|
||||||
"""SortedHelpFormatter that sorts arguments by their option strings."""
|
"""SortedHelpFormatter that sorts arguments by their option strings."""
|
||||||
|
|
||||||
def add_arguments(self, actions):
|
def add_arguments(self, actions):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user