mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 23:35:52 +08:00
[Benchmark] Change mii to use persistent deployment and support tensor parallel (#3628)
This commit is contained in:
parent
0267fef52a
commit
98a42e7078
@ -183,13 +183,15 @@ def run_mii(
|
|||||||
tensor_parallel_size: int,
|
tensor_parallel_size: int,
|
||||||
output_len: int,
|
output_len: int,
|
||||||
) -> float:
|
) -> float:
|
||||||
from mii import pipeline
|
from mii import client, serve
|
||||||
llm = pipeline(model, tensor_parallel=tensor_parallel_size)
|
llm = serve(model, tensor_parallel=tensor_parallel_size)
|
||||||
prompts = [prompt for prompt, _, _ in requests]
|
prompts = [prompt for prompt, _, _ in requests]
|
||||||
|
|
||||||
start = time.perf_counter()
|
start = time.perf_counter()
|
||||||
llm(prompts, max_new_tokens=output_len)
|
llm.generate(prompts, max_new_tokens=output_len)
|
||||||
end = time.perf_counter()
|
end = time.perf_counter()
|
||||||
|
client = client(model)
|
||||||
|
client.terminate_server()
|
||||||
return end - start
|
return end - start
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user