From 466166dcfdc40f85f4043a94b9a53099af4a0850 Mon Sep 17 00:00:00 2001 From: NekoMimiUnagi Date: Thu, 19 Jun 2025 02:21:41 -0500 Subject: [PATCH] [Frontend] Add optional token-level progress bar to `LLM.beam_search` (#19301) Signed-off-by: Ruosen Li Signed-off-by: Aaron Pham Signed-off-by: Ubuntu Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com> --- vllm/entrypoints/llm.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index f3170fa30fce1..87810772fc2e2 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -552,6 +552,7 @@ class LLM: prompts: list[Union[TokensPrompt, TextPrompt]], params: BeamSearchParams, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, + use_tqdm: bool = False, ) -> list[BeamSearchOutput]: """ Generate sequences using beam search. @@ -561,6 +562,7 @@ class LLM: of token IDs. params: The beam search parameters. lora_request: LoRA request to use for generation, if any. + use_tqdm: Whether to use tqdm to display the progress bar. """ # TODO: how does beam search work together with length penalty, # frequency, penalty, and stopping criteria, etc.? @@ -623,7 +625,18 @@ class LLM: **mm_kwargs, ), ) - for _ in range(max_tokens): + token_iter = range(max_tokens) + if use_tqdm: + token_iter = tqdm(token_iter, + desc="Beam search", + unit="token", + unit_scale=False) + logger.warning( + "The progress bar shows the upper bound on token steps and " + "may finish early due to stopping conditions. It does not " + "reflect instance-level progress.") + + for _ in token_iter: all_beams: list[BeamSearchSequence] = list( sum((instance.beams for instance in instances), [])) pos = [0] + list(