add option to shorten prompt print in log (#991)

Signed-off-by: Lei Wen <wenlei03@qiyi.com>
Co-authored-by: Lei Wen <wenlei03@qiyi.com>
Co-authored-by: Zhuohan Li <zhuohan123@gmail.com>
This commit is contained in:
leiwen83 2023-09-13 06:10:14 +08:00 committed by GitHub
parent 90eb3f43ca
commit d6545ad22e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 20 additions and 2 deletions

View File

@ -171,6 +171,7 @@ class AsyncEngineArgs(EngineArgs):
"""Arguments for asynchronous vLLM engine.""" """Arguments for asynchronous vLLM engine."""
engine_use_ray: bool = False engine_use_ray: bool = False
disable_log_requests: bool = False disable_log_requests: bool = False
max_log_len: Optional[int] = None
@staticmethod @staticmethod
def add_cli_args( def add_cli_args(
@ -183,4 +184,10 @@ class AsyncEngineArgs(EngineArgs):
parser.add_argument('--disable-log-requests', parser.add_argument('--disable-log-requests',
action='store_true', action='store_true',
help='disable logging requests') help='disable logging requests')
parser.add_argument('--max-log-len',
type=int,
default=None,
help='max number of prompt characters or prompt '
'ID numbers being printed in log. '
'Default: unlimited.')
return parser return parser

View File

@ -242,11 +242,13 @@ class AsyncLLMEngine:
engine_use_ray: bool, engine_use_ray: bool,
*args, *args,
log_requests: bool = True, log_requests: bool = True,
max_log_len: Optional[int] = None,
start_engine_loop: bool = True, start_engine_loop: bool = True,
**kwargs) -> None: **kwargs) -> None:
self.worker_use_ray = worker_use_ray self.worker_use_ray = worker_use_ray
self.engine_use_ray = engine_use_ray self.engine_use_ray = engine_use_ray
self.log_requests = log_requests self.log_requests = log_requests
self.max_log_len = max_log_len
self.engine = self._init_engine(*args, **kwargs) self.engine = self._init_engine(*args, **kwargs)
self.request_tracker: RequestTracker = RequestTracker() self.request_tracker: RequestTracker = RequestTracker()
@ -325,10 +327,18 @@ class AsyncLLMEngine:
arrival_time: Optional[float] = None, arrival_time: Optional[float] = None,
) -> AsyncStream: ) -> AsyncStream:
if self.log_requests: if self.log_requests:
shortened_prompt = prompt
shortened_token_ids = prompt_token_ids
if self.max_log_len is not None:
if shortened_prompt is not None:
shortened_prompt = shortened_prompt[:self.max_log_len]
if shortened_token_ids is not None:
shortened_token_ids = shortened_token_ids[:self.
max_log_len]
logger.info(f"Received request {request_id}: " logger.info(f"Received request {request_id}: "
f"prompt: {prompt!r}, " f"prompt: {shortened_prompt!r}, "
f"sampling params: {sampling_params}, " f"sampling params: {sampling_params}, "
f"prompt token ids: {prompt_token_ids}.") f"prompt token ids: {shortened_token_ids}.")
if not self.is_running: if not self.is_running:
if self.start_engine_loop: if self.start_engine_loop:
@ -446,5 +456,6 @@ class AsyncLLMEngine:
placement_group, placement_group,
log_requests=not engine_args.disable_log_requests, log_requests=not engine_args.disable_log_requests,
log_stats=not engine_args.disable_log_stats, log_stats=not engine_args.disable_log_stats,
max_log_len=engine_args.max_log_len,
start_engine_loop=start_engine_loop) start_engine_loop=start_engine_loop)
return engine return engine