From 0967110e42a33d016c0cf5214bd32c8ce3faae19 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Mon, 10 Mar 2025 14:23:48 -0700 Subject: [PATCH] [Minor] Update the tqdm bar for parallel sampling (#14571) Signed-off-by: Woosuk Kwon --- vllm/entrypoints/llm.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 6c2e87416b94..e8f3c1f4e50b 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -1384,8 +1384,9 @@ class LLM: if use_tqdm: if isinstance(output, RequestOutput): # Calculate tokens only for RequestOutput + n = len(output.outputs) assert output.prompt_token_ids is not None - total_in_toks += len(output.prompt_token_ids) + total_in_toks += len(output.prompt_token_ids) * n in_spd = total_in_toks / pbar.format_dict["elapsed"] total_out_toks += sum( len(stp.token_ids) for stp in output.outputs) @@ -1394,7 +1395,7 @@ class LLM: pbar.postfix = ( f"est. speed input: {in_spd:.2f} toks/s, " f"output: {out_spd:.2f} toks/s") - pbar.update(len(output.outputs)) + pbar.update(n) else: pbar.update(1)