diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py index cddfd672e7ab..813556f90f53 100644 --- a/vllm/benchmarks/serve.py +++ b/vllm/benchmarks/serve.py @@ -726,15 +726,13 @@ def add_cli_args(parser: argparse.ArgumentParser): default="ttft,tpot,itl", help="Comma-seperated list of selected metrics to report percentils. " "This argument specifies the metrics to report percentiles. " - "Allowed metric names are \"ttft\", \"tpot\", \"itl\", \"e2el\". " - "Default value is \"ttft,tpot,itl\".") + "Allowed metric names are \"ttft\", \"tpot\", \"itl\", \"e2el\". ") parser.add_argument( "--metric-percentiles", type=str, default="99", help="Comma-seperated list of percentiles for selected metrics. " "To report 25-th, 50-th, and 75-th percentiles, use \"25,50,75\". " - "Default value is \"99\". " "Use \"--percentile-metrics\" to select metrics.", ) parser.add_argument( diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 6f498af36a40..ca511c7434f8 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -322,9 +322,7 @@ class EngineArgs: parser.add_argument('--download-dir', type=nullable_str, default=EngineArgs.download_dir, - help='Directory to download and load the weights, ' - 'default to the default cache dir of ' - 'huggingface.') + help='Directory to download and load the weights.') parser.add_argument( '--load-format', type=str, @@ -399,8 +397,7 @@ class EngineArgs: 'Valid backend values are "xgrammar", "guidance", and "auto". ' 'With "auto", we will make opinionated choices based on request' 'contents and what the backend libraries currently support, so ' - 'the behavior is subject to change in each release. ' - 'The default is xgrammar.') + 'the behavior is subject to change in each release.') parser.add_argument( '--logits-processor-pattern', type=nullable_str, @@ -493,8 +490,7 @@ class EngineArgs: default=EngineArgs.prefix_caching_hash_algo, help="Set the hash algorithm for prefix caching. " "Options are 'builtin' (Python's built-in hash) or 'sha256' " - "(collision resistant but with certain overheads). Defaults " - "to 'builtin'.", + "(collision resistant but with certain overheads).", ) parser.add_argument('--disable-sliding-window', action='store_true', @@ -568,9 +564,7 @@ class EngineArgs: type=int, default=EngineArgs.max_num_partial_prefills, help="For chunked prefill, the max number of concurrent \ - partial prefills." - "Defaults to 1", - ) + partial prefills.") parser.add_argument( "--max-long-partial-prefills", type=int, @@ -579,15 +573,13 @@ class EngineArgs: "than --long-prefill-token-threshold that will be prefilled " "concurrently. Setting this less than --max-num-partial-prefills " "will allow shorter prompts to jump the queue in front of longer " - "prompts in some cases, improving latency. Defaults to 1.") + "prompts in some cases, improving latency.") parser.add_argument( "--long-prefill-token-threshold", type=float, default=EngineArgs.long_prefill_token_threshold, help="For chunked prefill, a request is considered long if the " - "prompt is longer than this number of tokens. Defaults to 4%% of " - "the model's context length.", - ) + "prompt is longer than this number of tokens.") parser.add_argument('--max-num-seqs', type=int, default=EngineArgs.max_num_seqs, @@ -739,8 +731,7 @@ class EngineArgs: type=int, default=EngineArgs.max_cpu_loras, help=('Maximum number of LoRAs to store in CPU memory. ' - 'Must be >= than max_loras. ' - 'Defaults to max_loras.')) + 'Must be >= than max_loras.')) parser.add_argument( '--fully-sharded-loras', action='store_true', @@ -894,7 +885,7 @@ class EngineArgs: help='Set the lower bound threshold for the posterior ' 'probability of a token to be accepted. This threshold is ' 'used by the TypicalAcceptanceSampler to make sampling decisions ' - 'during speculative decoding. Defaults to 0.09') + 'during speculative decoding.') parser.add_argument( '--typical-acceptance-sampler-posterior-alpha', diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py index e956920c2f9a..218a8fbe10b7 100644 --- a/vllm/entrypoints/openai/cli_args.py +++ b/vllm/entrypoints/openai/cli_args.py @@ -247,7 +247,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: default=None, help='Max number of prompt characters or prompt ' 'ID numbers being printed in log.' - '\n\nDefault: Unlimited') + ' The default of None means unlimited.') parser.add_argument( "--disable-fastapi-docs", diff --git a/vllm/utils.py b/vllm/utils.py index afe68a2b8cb3..bf83b38ace80 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -1212,7 +1212,7 @@ class StoreBoolean(argparse.Action): "Expected 'true' or 'false'.") -class SortedHelpFormatter(argparse.HelpFormatter): +class SortedHelpFormatter(argparse.ArgumentDefaultsHelpFormatter): """SortedHelpFormatter that sorts arguments by their option strings.""" def add_arguments(self, actions):