mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 06:45:01 +08:00
[Frontend] Add --logprobs argument to benchmark_serving.py (#8191)
This commit is contained in:
parent
baa5467547
commit
e5cab71531
@ -24,6 +24,7 @@ class RequestFuncInput:
|
|||||||
model: str
|
model: str
|
||||||
best_of: int = 1
|
best_of: int = 1
|
||||||
use_beam_search: bool = False
|
use_beam_search: bool = False
|
||||||
|
logprobs: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -236,6 +237,7 @@ async def async_request_openai_completions(
|
|||||||
"temperature": 0.0,
|
"temperature": 0.0,
|
||||||
"best_of": request_func_input.best_of,
|
"best_of": request_func_input.best_of,
|
||||||
"max_tokens": request_func_input.output_len,
|
"max_tokens": request_func_input.output_len,
|
||||||
|
"logprobs": request_func_input.logprobs,
|
||||||
"stream": True,
|
"stream": True,
|
||||||
}
|
}
|
||||||
headers = {
|
headers = {
|
||||||
|
|||||||
@ -318,6 +318,7 @@ async def benchmark(
|
|||||||
model_id: str,
|
model_id: str,
|
||||||
tokenizer: PreTrainedTokenizerBase,
|
tokenizer: PreTrainedTokenizerBase,
|
||||||
input_requests: List[Tuple[str, int, int]],
|
input_requests: List[Tuple[str, int, int]],
|
||||||
|
logprobs: Optional[int],
|
||||||
best_of: int,
|
best_of: int,
|
||||||
use_beam_search: bool,
|
use_beam_search: bool,
|
||||||
request_rate: float,
|
request_rate: float,
|
||||||
@ -339,6 +340,7 @@ async def benchmark(
|
|||||||
api_url=api_url,
|
api_url=api_url,
|
||||||
prompt_len=test_prompt_len,
|
prompt_len=test_prompt_len,
|
||||||
output_len=test_output_len,
|
output_len=test_output_len,
|
||||||
|
logprobs=logprobs,
|
||||||
best_of=best_of,
|
best_of=best_of,
|
||||||
use_beam_search=use_beam_search,
|
use_beam_search=use_beam_search,
|
||||||
)
|
)
|
||||||
@ -358,6 +360,7 @@ async def benchmark(
|
|||||||
api_url=base_url + "/start_profile",
|
api_url=base_url + "/start_profile",
|
||||||
prompt_len=test_prompt_len,
|
prompt_len=test_prompt_len,
|
||||||
output_len=test_output_len,
|
output_len=test_output_len,
|
||||||
|
logprobs=logprobs,
|
||||||
best_of=best_of,
|
best_of=best_of,
|
||||||
use_beam_search=use_beam_search,
|
use_beam_search=use_beam_search,
|
||||||
)
|
)
|
||||||
@ -379,6 +382,7 @@ async def benchmark(
|
|||||||
api_url=api_url,
|
api_url=api_url,
|
||||||
prompt_len=prompt_len,
|
prompt_len=prompt_len,
|
||||||
output_len=output_len,
|
output_len=output_len,
|
||||||
|
logprobs=logprobs,
|
||||||
best_of=best_of,
|
best_of=best_of,
|
||||||
use_beam_search=use_beam_search,
|
use_beam_search=use_beam_search,
|
||||||
)
|
)
|
||||||
@ -396,6 +400,7 @@ async def benchmark(
|
|||||||
api_url=base_url + "/stop_profile",
|
api_url=base_url + "/stop_profile",
|
||||||
prompt_len=test_prompt_len,
|
prompt_len=test_prompt_len,
|
||||||
output_len=test_output_len,
|
output_len=test_output_len,
|
||||||
|
logprobs=logprobs,
|
||||||
best_of=best_of,
|
best_of=best_of,
|
||||||
use_beam_search=use_beam_search,
|
use_beam_search=use_beam_search,
|
||||||
)
|
)
|
||||||
@ -580,6 +585,7 @@ def main(args: argparse.Namespace):
|
|||||||
model_id=model_id,
|
model_id=model_id,
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
input_requests=input_requests,
|
input_requests=input_requests,
|
||||||
|
logprobs=args.logprobs,
|
||||||
best_of=args.best_of,
|
best_of=args.best_of,
|
||||||
use_beam_search=args.use_beam_search,
|
use_beam_search=args.use_beam_search,
|
||||||
request_rate=args.request_rate,
|
request_rate=args.request_rate,
|
||||||
@ -721,6 +727,16 @@ if __name__ == "__main__":
|
|||||||
help=
|
help=
|
||||||
"Number of output tokens per request, used only for sonnet dataset.",
|
"Number of output tokens per request, used only for sonnet dataset.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--logprobs",
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
help=("Number of logprobs-per-token to compute & return as part of "
|
||||||
|
"the request. If unspecified, then either (1) if beam search "
|
||||||
|
"is disabled, no logprobs are computed & a single dummy "
|
||||||
|
"logprob is returned for each token; or (2) if beam search "
|
||||||
|
"is enabled 1 logprob per token is computed"),
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--sonnet-prefix-len",
|
"--sonnet-prefix-len",
|
||||||
type=int,
|
type=int,
|
||||||
|
|||||||
@ -57,7 +57,7 @@ def test_multi_step_llm(
|
|||||||
GPU -> CPU output transfer
|
GPU -> CPU output transfer
|
||||||
num_prompts: number of example prompts under test
|
num_prompts: number of example prompts under test
|
||||||
num_logprobs: corresponds to the `logprobs` argument to the OpenAI
|
num_logprobs: corresponds to the `logprobs` argument to the OpenAI
|
||||||
completions endpoint; `None` -> no logprobs
|
completions endpoint; `None` -> 1 logprob returned.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
prompts = example_prompts
|
prompts = example_prompts
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user