diff --git a/vllm/entrypoints/cli/benchmark/main.py b/vllm/entrypoints/cli/benchmark/main.py index 2ff98577c3634..48f34fce1d44c 100644 --- a/vllm/entrypoints/cli/benchmark/main.py +++ b/vllm/entrypoints/cli/benchmark/main.py @@ -32,6 +32,7 @@ class BenchmarkSubcommand(CLISubcommand): ) -> FlexibleArgumentParser: bench_parser = subparsers.add_parser( self.name, + help=self.help, description=self.help, usage=f"vllm {self.name} [options]", ) diff --git a/vllm/entrypoints/cli/serve.py b/vllm/entrypoints/cli/serve.py index 96608f360e17b..77c7253aef06e 100644 --- a/vllm/entrypoints/cli/serve.py +++ b/vllm/entrypoints/cli/serve.py @@ -66,7 +66,11 @@ class ServeSubcommand(CLISubcommand): self, subparsers: argparse._SubParsersAction ) -> FlexibleArgumentParser: serve_parser = subparsers.add_parser( - self.name, description=DESCRIPTION, usage="vllm serve [model_tag] [options]" + self.name, + help="Launch a local OpenAI-compatible API server to serve LLM " + "completions via HTTP.", + description=DESCRIPTION, + usage="vllm serve [model_tag] [options]", ) serve_parser = make_arg_parser(serve_parser)