From 6cd4ae8acdaacdfc89d2e025b24ba1f1927af260 Mon Sep 17 00:00:00 2001 From: Reid <61492567+reidliu41@users.noreply.github.com> Date: Tue, 10 Jun 2025 12:55:09 +0800 Subject: [PATCH] [Frontend] Add tqdm_leave_pbar to control progress bar visibility (#19357) Signed-off-by: reidliu41 Co-authored-by: reidliu41 --- vllm/entrypoints/llm.py | 82 ++++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 30 deletions(-) diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 7c9ca41dd7940..6e3cb18fc5595 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -281,7 +281,7 @@ class LLM: sampling_params: Optional[Union[SamplingParams, Sequence[SamplingParams]]] = None, *, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, guided_options_request: Optional[Union[LLMGuidedOptions, @@ -297,7 +297,7 @@ class LLM: sampling_params: Optional[Union[SamplingParams, list[SamplingParams]]] = None, prompt_token_ids: Optional[list[int]] = None, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, guided_options_request: Optional[Union[LLMGuidedOptions, @@ -313,7 +313,7 @@ class LLM: sampling_params: Optional[Union[SamplingParams, list[SamplingParams]]] = None, prompt_token_ids: Optional[list[list[int]]] = None, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, guided_options_request: Optional[Union[LLMGuidedOptions, @@ -330,7 +330,7 @@ class LLM: list[SamplingParams]]] = None, *, prompt_token_ids: list[int], - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, guided_options_request: Optional[Union[LLMGuidedOptions, @@ -347,7 +347,7 @@ class LLM: list[SamplingParams]]] = None, *, prompt_token_ids: list[list[int]], - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, guided_options_request: Optional[Union[LLMGuidedOptions, @@ -362,7 +362,7 @@ class LLM: prompts: None, sampling_params: None, prompt_token_ids: Union[list[int], list[list[int]]], - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, guided_options_request: Optional[Union[LLMGuidedOptions, @@ -382,7 +382,7 @@ class LLM: sampling_params: Optional[Union[SamplingParams, Sequence[SamplingParams]]] = None, prompt_token_ids: Optional[Union[list[int], list[list[int]]]] = None, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, guided_options_request: Optional[Union[LLMGuidedOptions, @@ -404,7 +404,10 @@ class LLM: When it is a single value, it is applied to every prompt. When it is a list, the list must have the same length as the prompts and it is paired one by one with the prompt. - use_tqdm: Whether to use tqdm to display the progress bar. + use_tqdm: If `True`, shows a tqdm progress bar. + If a callable (e.g., `functools.partial(tqdm, leave=False)`), + it is used to create the progress bar. + If `False`, no progress bar is created. lora_request: LoRA request to use for generation, if any. prompt_adapter_request: Prompt Adapter request to use for generation, if any. @@ -678,7 +681,7 @@ class LLM: list[list[ChatCompletionMessageParam]]], sampling_params: Optional[Union[SamplingParams, list[SamplingParams]]] = None, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[LoRARequest] = None, chat_template: Optional[str] = None, chat_template_content_format: ChatTemplateContentFormatOption = "auto", @@ -709,7 +712,10 @@ class LLM: is a single value, it is applied to every prompt. When it is a list, the list must have the same length as the prompts and it is paired one by one with the prompt. - use_tqdm: Whether to use tqdm to display the progress bar. + use_tqdm: If `True`, shows a tqdm progress bar. + If a callable (e.g., `functools.partial(tqdm, leave=False)`), + it is used to create the progress bar. + If `False`, no progress bar is created. lora_request: LoRA request to use for generation, if any. chat_template: The template to use for structuring the chat. If not provided, the model's default chat template will be used. @@ -823,7 +829,7 @@ class LLM: Sequence[PoolingParams]]] = None, *, truncate_prompt_tokens: Optional[int] = None, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, ) -> list[PoolingRequestOutput]: @@ -838,7 +844,7 @@ class LLM: Sequence[PoolingParams]]] = None, prompt_token_ids: Optional[list[int]] = None, truncate_prompt_tokens: Optional[int] = None, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, ) -> list[PoolingRequestOutput]: @@ -853,7 +859,7 @@ class LLM: Sequence[PoolingParams]]] = None, prompt_token_ids: Optional[list[list[int]]] = None, truncate_prompt_tokens: Optional[int] = None, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, ) -> list[PoolingRequestOutput]: @@ -869,7 +875,7 @@ class LLM: *, prompt_token_ids: list[int], truncate_prompt_tokens: Optional[int] = None, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, ) -> list[PoolingRequestOutput]: @@ -885,7 +891,7 @@ class LLM: *, prompt_token_ids: list[list[int]], truncate_prompt_tokens: Optional[int] = None, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, ) -> list[PoolingRequestOutput]: @@ -899,7 +905,7 @@ class LLM: pooling_params: None, prompt_token_ids: Union[list[int], list[list[int]]], truncate_prompt_tokens: Optional[int] = None, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, ) -> list[PoolingRequestOutput]: @@ -918,7 +924,7 @@ class LLM: Sequence[PoolingParams]]] = None, prompt_token_ids: Optional[Union[list[int], list[list[int]]]] = None, truncate_prompt_tokens: Optional[int] = None, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, ) -> list[PoolingRequestOutput]: @@ -935,7 +941,10 @@ class LLM: for more details about the format of each prompts. pooling_params: The pooling parameters for pooling. If None, we use the default pooling parameters. - use_tqdm: Whether to use tqdm to display the progress bar. + use_tqdm: If `True`, shows a tqdm progress bar. + If a callable (e.g., `functools.partial(tqdm, leave=False)`), + it is used to create the progress bar. + If `False`, no progress bar is created. lora_request: LoRA request to use for generation, if any. prompt_adapter_request: Prompt Adapter request to use for generation, if any. @@ -1005,7 +1014,7 @@ class LLM: /, *, truncate_prompt_tokens: Optional[int] = None, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, pooling_params: Optional[Union[PoolingParams, Sequence[PoolingParams]]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, @@ -1024,7 +1033,10 @@ class LLM: for more details about the format of each prompts. pooling_params: The pooling parameters for pooling. If None, we use the default pooling parameters. - use_tqdm: Whether to use tqdm to display the progress bar. + use_tqdm: If `True`, shows a tqdm progress bar. + If a callable (e.g., `functools.partial(tqdm, leave=False)`), + it is used to create the progress bar. + If `False`, no progress bar is created. lora_request: LoRA request to use for generation, if any. prompt_adapter_request: Prompt Adapter request to use for generation, if any. @@ -1051,7 +1063,7 @@ class LLM: prompts: Union[PromptType, Sequence[PromptType]], /, *, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, ) -> list[ClassificationRequestOutput]: @@ -1066,7 +1078,10 @@ class LLM: prompts: The prompts to the LLM. You may pass a sequence of prompts for batch inference. See [PromptType][vllm.inputs.PromptType] for more details about the format of each prompts. - use_tqdm: Whether to use tqdm to display the progress bar. + use_tqdm: If `True`, shows a tqdm progress bar. + If a callable (e.g., `functools.partial(tqdm, leave=False)`), + it is used to create the progress bar. + If `False`, no progress bar is created. lora_request: LoRA request to use for generation, if any. prompt_adapter_request: Prompt Adapter request to use for generation, if any. @@ -1092,7 +1107,7 @@ class LLM: text_1: list[Union[str, TextPrompt, TokensPrompt]], text_2: list[Union[str, TextPrompt, TokensPrompt]], truncate_prompt_tokens: Optional[int] = None, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, ) -> list[ScoringRequestOutput]: @@ -1126,7 +1141,7 @@ class LLM: text_1: list[str], text_2: list[str], truncate_prompt_tokens: Optional[int] = None, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, ) -> list[ScoringRequestOutput]: @@ -1178,7 +1193,7 @@ class LLM: /, *, truncate_prompt_tokens: Optional[int] = None, - use_tqdm: bool = True, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None, ) -> list[ScoringRequestOutput]: @@ -1198,7 +1213,10 @@ class LLM: text_2: The texts to pair with the query to form the input to the LLM. See [PromptType][vllm.inputs.PromptType] for more details about the format of each prompts. - use_tqdm: Whether to use tqdm to display the progress bar. + use_tqdm: If `True`, shows a tqdm progress bar. + If a callable (e.g., `functools.partial(tqdm, leave=False)`), + it is used to create the progress bar. + If `False`, no progress bar is created. lora_request: LoRA request to use for generation, if any. prompt_adapter_request: Prompt Adapter request to use for generation, if any. @@ -1379,7 +1397,7 @@ class LLM: params: Union[SamplingParams, Sequence[SamplingParams], PoolingParams, Sequence[PoolingParams]], *, - use_tqdm: bool, + use_tqdm: Union[bool, Callable[..., tqdm]] = True, lora_request: Optional[Union[Sequence[LoRARequest], LoRARequest]], prompt_adapter_request: Optional[PromptAdapterRequest], tokenization_kwargs: Optional[dict[str, Any]] = None, @@ -1417,7 +1435,8 @@ class LLM: # Add requests to the engine. it = prompts if use_tqdm: - it = tqdm(it, desc="Adding requests") + tqdm_func = use_tqdm if callable(use_tqdm) else tqdm + it = tqdm_func(it, desc="Adding requests") for i, prompt in enumerate(it): self._add_request( @@ -1474,12 +1493,15 @@ class LLM: return params def _run_engine( - self, *, use_tqdm: bool + self, + *, + use_tqdm: Union[bool, Callable[..., tqdm]] = True ) -> list[Union[RequestOutput, PoolingRequestOutput]]: # Initialize tqdm. if use_tqdm: num_requests = self.llm_engine.get_num_unfinished_requests() - pbar = tqdm( + tqdm_func = use_tqdm if callable(use_tqdm) else tqdm + pbar = tqdm_func( total=num_requests, desc="Processed prompts", dynamic_ncols=True,