mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 09:35:50 +08:00
[CLI] add --max-tokens to vllm complete (#28109)
Signed-off-by: Iceber Gu <caiwei95@hotmail.com>
This commit is contained in:
parent
72b1c2ae2c
commit
e0d6b4a867
@ -195,10 +195,15 @@ class CompleteCommand(CLISubcommand):
|
|||||||
def cmd(args: argparse.Namespace) -> None:
|
def cmd(args: argparse.Namespace) -> None:
|
||||||
model_name, client = _interactive_cli(args)
|
model_name, client = _interactive_cli(args)
|
||||||
|
|
||||||
|
kwargs = {
|
||||||
|
"model": model_name,
|
||||||
|
"stream": True,
|
||||||
|
}
|
||||||
|
if args.max_tokens:
|
||||||
|
kwargs["max_tokens"] = args.max_tokens
|
||||||
|
|
||||||
if args.quick:
|
if args.quick:
|
||||||
stream = client.completions.create(
|
stream = client.completions.create(prompt=args.quick, **kwargs)
|
||||||
model=model_name, prompt=args.quick, stream=True
|
|
||||||
)
|
|
||||||
_print_completion_stream(stream)
|
_print_completion_stream(stream)
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -208,15 +213,18 @@ class CompleteCommand(CLISubcommand):
|
|||||||
input_prompt = input("> ")
|
input_prompt = input("> ")
|
||||||
except EOFError:
|
except EOFError:
|
||||||
break
|
break
|
||||||
stream = client.completions.create(
|
stream = client.completions.create(prompt=input_prompt, **kwargs)
|
||||||
model=model_name, prompt=input_prompt, stream=True
|
|
||||||
)
|
|
||||||
_print_completion_stream(stream)
|
_print_completion_stream(stream)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
||||||
"""Add CLI arguments for the complete command."""
|
"""Add CLI arguments for the complete command."""
|
||||||
_add_query_options(parser)
|
_add_query_options(parser)
|
||||||
|
parser.add_argument(
|
||||||
|
"--max-tokens",
|
||||||
|
type=int,
|
||||||
|
help="Maximum number of tokens to generate per output sequence.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-q",
|
"-q",
|
||||||
"--quick",
|
"--quick",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user