mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 11:36:20 +08:00
[Misc] Benchmark Serving Script Support Appending Results (#17028)
Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
This commit is contained in:
parent
2f54045508
commit
881f735827
@ -713,7 +713,7 @@ def main(args: argparse.Namespace):
|
|||||||
))
|
))
|
||||||
|
|
||||||
# Save config and results to json
|
# Save config and results to json
|
||||||
if args.save_result:
|
if args.save_result or args.append_result:
|
||||||
result_json: dict[str, Any] = {}
|
result_json: dict[str, Any] = {}
|
||||||
|
|
||||||
# Setup
|
# Setup
|
||||||
@ -734,6 +734,14 @@ def main(args: argparse.Namespace):
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Invalid metadata format. Please use KEY=VALUE format."
|
"Invalid metadata format. Please use KEY=VALUE format."
|
||||||
)
|
)
|
||||||
|
# Traffic
|
||||||
|
result_json["request_rate"] = (args.request_rate if args.request_rate
|
||||||
|
< float("inf") else "inf")
|
||||||
|
result_json["burstiness"] = args.burstiness
|
||||||
|
result_json["max_concurrency"] = args.max_concurrency
|
||||||
|
|
||||||
|
# Merge with benchmark result
|
||||||
|
result_json = {**result_json, **benchmark_result}
|
||||||
|
|
||||||
if not args.save_detailed:
|
if not args.save_detailed:
|
||||||
# Remove fields with too many data points
|
# Remove fields with too many data points
|
||||||
@ -744,15 +752,6 @@ def main(args: argparse.Namespace):
|
|||||||
if field in result_json:
|
if field in result_json:
|
||||||
del result_json[field]
|
del result_json[field]
|
||||||
|
|
||||||
# Traffic
|
|
||||||
result_json["request_rate"] = (args.request_rate if args.request_rate
|
|
||||||
< float("inf") else "inf")
|
|
||||||
result_json["burstiness"] = args.burstiness
|
|
||||||
result_json["max_concurrency"] = args.max_concurrency
|
|
||||||
|
|
||||||
# Merge with benchmark result
|
|
||||||
result_json = {**result_json, **benchmark_result}
|
|
||||||
|
|
||||||
# Save to file
|
# Save to file
|
||||||
base_model_id = model_id.split("/")[-1]
|
base_model_id = model_id.split("/")[-1]
|
||||||
max_concurrency_str = (f"-concurrency{args.max_concurrency}"
|
max_concurrency_str = (f"-concurrency{args.max_concurrency}"
|
||||||
@ -762,7 +761,12 @@ def main(args: argparse.Namespace):
|
|||||||
file_name = args.result_filename
|
file_name = args.result_filename
|
||||||
if args.result_dir:
|
if args.result_dir:
|
||||||
file_name = os.path.join(args.result_dir, file_name)
|
file_name = os.path.join(args.result_dir, file_name)
|
||||||
with open(file_name, "w", encoding='utf-8') as outfile:
|
with open(file_name,
|
||||||
|
mode="a+" if args.append_result else "w",
|
||||||
|
encoding='utf-8') as outfile:
|
||||||
|
# Append a newline.
|
||||||
|
if args.append_result and outfile.tell() != 0:
|
||||||
|
outfile.write("\n")
|
||||||
json.dump(result_json, outfile)
|
json.dump(result_json, outfile)
|
||||||
save_to_pytorch_benchmark_format(args, result_json, file_name)
|
save_to_pytorch_benchmark_format(args, result_json, file_name)
|
||||||
|
|
||||||
@ -894,6 +898,11 @@ if __name__ == "__main__":
|
|||||||
help="When saving the results, whether to include per request "
|
help="When saving the results, whether to include per request "
|
||||||
"information such as response, error, ttfs, tpots, etc.",
|
"information such as response, error, ttfs, tpots, etc.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--append-result",
|
||||||
|
action="store_true",
|
||||||
|
help="Append the benchmark result to the existing json file.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--metadata",
|
"--metadata",
|
||||||
metavar="KEY=VALUE",
|
metavar="KEY=VALUE",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user