mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 03:15:20 +08:00
[benchmark] add max-concurrency in result table (#21095)
Signed-off-by: Peter Pan <Peter.Pan@daocloud.io>
This commit is contained in:
parent
fc91da5499
commit
533db0935d
@ -413,6 +413,10 @@ async def benchmark(
|
|||||||
|
|
||||||
print("{s:{c}^{n}}".format(s=" Serving Benchmark Result ", n=50, c="="))
|
print("{s:{c}^{n}}".format(s=" Serving Benchmark Result ", n=50, c="="))
|
||||||
print("{:<40} {:<10}".format("Successful requests:", metrics.completed))
|
print("{:<40} {:<10}".format("Successful requests:", metrics.completed))
|
||||||
|
if max_concurrency is not None:
|
||||||
|
print("{:<40} {:<10}".format("Maximum request concurrency:", max_concurrency))
|
||||||
|
if request_rate != float("inf"):
|
||||||
|
print("{:<40} {:<10.2f}".format("Request rate configured (RPS):", request_rate))
|
||||||
print("{:<40} {:<10.2f}".format("Benchmark duration (s):", benchmark_duration))
|
print("{:<40} {:<10.2f}".format("Benchmark duration (s):", benchmark_duration))
|
||||||
print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input))
|
print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input))
|
||||||
print("{:<40} {:<10}".format("Total generated tokens:", metrics.total_output))
|
print("{:<40} {:<10}".format("Total generated tokens:", metrics.total_output))
|
||||||
|
|||||||
@ -555,6 +555,10 @@ async def benchmark(
|
|||||||
|
|
||||||
print("{s:{c}^{n}}".format(s=" Serving Benchmark Result ", n=50, c="="))
|
print("{s:{c}^{n}}".format(s=" Serving Benchmark Result ", n=50, c="="))
|
||||||
print("{:<40} {:<10}".format("Successful requests:", metrics.completed))
|
print("{:<40} {:<10}".format("Successful requests:", metrics.completed))
|
||||||
|
if max_concurrency is not None:
|
||||||
|
print("{:<40} {:<10}".format("Maximum request concurrency:", max_concurrency))
|
||||||
|
if request_rate != float("inf"):
|
||||||
|
print("{:<40} {:<10.2f}".format("Request rate configured (RPS):", request_rate))
|
||||||
print("{:<40} {:<10.2f}".format("Benchmark duration (s):", benchmark_duration))
|
print("{:<40} {:<10.2f}".format("Benchmark duration (s):", benchmark_duration))
|
||||||
print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input))
|
print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input))
|
||||||
print("{:<40} {:<10}".format("Total generated tokens:", metrics.total_output))
|
print("{:<40} {:<10}".format("Total generated tokens:", metrics.total_output))
|
||||||
|
|||||||
@ -486,6 +486,12 @@ async def benchmark(
|
|||||||
|
|
||||||
print("{s:{c}^{n}}".format(s=' Serving Benchmark Result ', n=50, c='='))
|
print("{s:{c}^{n}}".format(s=' Serving Benchmark Result ', n=50, c='='))
|
||||||
print("{:<40} {:<10}".format("Successful requests:", metrics.completed))
|
print("{:<40} {:<10}".format("Successful requests:", metrics.completed))
|
||||||
|
if max_concurrency is not None:
|
||||||
|
print("{:<40} {:<10}".format("Maximum request concurrency:",
|
||||||
|
max_concurrency))
|
||||||
|
if request_rate != float('inf'):
|
||||||
|
print("{:<40} {:<10.2f}".format("Request rate configured (RPS):",
|
||||||
|
request_rate ))
|
||||||
print("{:<40} {:<10.2f}".format("Benchmark duration (s):",
|
print("{:<40} {:<10.2f}".format("Benchmark duration (s):",
|
||||||
benchmark_duration))
|
benchmark_duration))
|
||||||
print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input))
|
print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user