[Benchmark] Change mii to use persistent deployment and support tensor parallel (#3628)

This commit is contained in:
Yile (Michael) Gu 2024-03-28 17:33:52 -07:00 committed by GitHub
parent 0267fef52a
commit 98a42e7078
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -183,13 +183,15 @@ def run_mii(
tensor_parallel_size: int,
output_len: int,
) -> float:
from mii import pipeline
llm = pipeline(model, tensor_parallel=tensor_parallel_size)
from mii import client, serve
llm = serve(model, tensor_parallel=tensor_parallel_size)
prompts = [prompt for prompt, _, _ in requests]
start = time.perf_counter()
llm(prompts, max_new_tokens=output_len)
llm.generate(prompts, max_new_tokens=output_len)
end = time.perf_counter()
client = client(model)
client.terminate_server()
return end - start