From d6545ad22ee89c8b6e3eb6cfcf8ff914a06ccee1 Mon Sep 17 00:00:00 2001 From: leiwen83 Date: Wed, 13 Sep 2023 06:10:14 +0800 Subject: [PATCH] add option to shorten prompt print in log (#991) Signed-off-by: Lei Wen Co-authored-by: Lei Wen Co-authored-by: Zhuohan Li --- vllm/engine/arg_utils.py | 7 +++++++ vllm/engine/async_llm_engine.py | 15 +++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index b775ec089dcf..7679966c512e 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -171,6 +171,7 @@ class AsyncEngineArgs(EngineArgs): """Arguments for asynchronous vLLM engine.""" engine_use_ray: bool = False disable_log_requests: bool = False + max_log_len: Optional[int] = None @staticmethod def add_cli_args( @@ -183,4 +184,10 @@ class AsyncEngineArgs(EngineArgs): parser.add_argument('--disable-log-requests', action='store_true', help='disable logging requests') + parser.add_argument('--max-log-len', + type=int, + default=None, + help='max number of prompt characters or prompt ' + 'ID numbers being printed in log. ' + 'Default: unlimited.') return parser diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 1a228c824924..1a52f0262d55 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -242,11 +242,13 @@ class AsyncLLMEngine: engine_use_ray: bool, *args, log_requests: bool = True, + max_log_len: Optional[int] = None, start_engine_loop: bool = True, **kwargs) -> None: self.worker_use_ray = worker_use_ray self.engine_use_ray = engine_use_ray self.log_requests = log_requests + self.max_log_len = max_log_len self.engine = self._init_engine(*args, **kwargs) self.request_tracker: RequestTracker = RequestTracker() @@ -325,10 +327,18 @@ class AsyncLLMEngine: arrival_time: Optional[float] = None, ) -> AsyncStream: if self.log_requests: + shortened_prompt = prompt + shortened_token_ids = prompt_token_ids + if self.max_log_len is not None: + if shortened_prompt is not None: + shortened_prompt = shortened_prompt[:self.max_log_len] + if shortened_token_ids is not None: + shortened_token_ids = shortened_token_ids[:self. + max_log_len] logger.info(f"Received request {request_id}: " - f"prompt: {prompt!r}, " + f"prompt: {shortened_prompt!r}, " f"sampling params: {sampling_params}, " - f"prompt token ids: {prompt_token_ids}.") + f"prompt token ids: {shortened_token_ids}.") if not self.is_running: if self.start_engine_loop: @@ -446,5 +456,6 @@ class AsyncLLMEngine: placement_group, log_requests=not engine_args.disable_log_requests, log_stats=not engine_args.disable_log_stats, + max_log_len=engine_args.max_log_len, start_engine_loop=start_engine_loop) return engine