[Minor] Update the tqdm bar for parallel sampling (#14571)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon 2025-03-10 14:23:48 -07:00 committed by GitHub
parent fb0acb6c72
commit 0967110e42
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1384,8 +1384,9 @@ class LLM:
if use_tqdm:
if isinstance(output, RequestOutput):
# Calculate tokens only for RequestOutput
n = len(output.outputs)
assert output.prompt_token_ids is not None
total_in_toks += len(output.prompt_token_ids)
total_in_toks += len(output.prompt_token_ids) * n
in_spd = total_in_toks / pbar.format_dict["elapsed"]
total_out_toks += sum(
len(stp.token_ids) for stp in output.outputs)
@ -1394,7 +1395,7 @@ class LLM:
pbar.postfix = (
f"est. speed input: {in_spd:.2f} toks/s, "
f"output: {out_spd:.2f} toks/s")
pbar.update(len(output.outputs))
pbar.update(n)
else:
pbar.update(1)