mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 23:45:54 +08:00
add option to shorten prompt print in log (#991)
Signed-off-by: Lei Wen <wenlei03@qiyi.com> Co-authored-by: Lei Wen <wenlei03@qiyi.com> Co-authored-by: Zhuohan Li <zhuohan123@gmail.com>
This commit is contained in:
parent
90eb3f43ca
commit
d6545ad22e
@ -171,6 +171,7 @@ class AsyncEngineArgs(EngineArgs):
|
|||||||
"""Arguments for asynchronous vLLM engine."""
|
"""Arguments for asynchronous vLLM engine."""
|
||||||
engine_use_ray: bool = False
|
engine_use_ray: bool = False
|
||||||
disable_log_requests: bool = False
|
disable_log_requests: bool = False
|
||||||
|
max_log_len: Optional[int] = None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def add_cli_args(
|
def add_cli_args(
|
||||||
@ -183,4 +184,10 @@ class AsyncEngineArgs(EngineArgs):
|
|||||||
parser.add_argument('--disable-log-requests',
|
parser.add_argument('--disable-log-requests',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='disable logging requests')
|
help='disable logging requests')
|
||||||
|
parser.add_argument('--max-log-len',
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
help='max number of prompt characters or prompt '
|
||||||
|
'ID numbers being printed in log. '
|
||||||
|
'Default: unlimited.')
|
||||||
return parser
|
return parser
|
||||||
|
|||||||
@ -242,11 +242,13 @@ class AsyncLLMEngine:
|
|||||||
engine_use_ray: bool,
|
engine_use_ray: bool,
|
||||||
*args,
|
*args,
|
||||||
log_requests: bool = True,
|
log_requests: bool = True,
|
||||||
|
max_log_len: Optional[int] = None,
|
||||||
start_engine_loop: bool = True,
|
start_engine_loop: bool = True,
|
||||||
**kwargs) -> None:
|
**kwargs) -> None:
|
||||||
self.worker_use_ray = worker_use_ray
|
self.worker_use_ray = worker_use_ray
|
||||||
self.engine_use_ray = engine_use_ray
|
self.engine_use_ray = engine_use_ray
|
||||||
self.log_requests = log_requests
|
self.log_requests = log_requests
|
||||||
|
self.max_log_len = max_log_len
|
||||||
self.engine = self._init_engine(*args, **kwargs)
|
self.engine = self._init_engine(*args, **kwargs)
|
||||||
|
|
||||||
self.request_tracker: RequestTracker = RequestTracker()
|
self.request_tracker: RequestTracker = RequestTracker()
|
||||||
@ -325,10 +327,18 @@ class AsyncLLMEngine:
|
|||||||
arrival_time: Optional[float] = None,
|
arrival_time: Optional[float] = None,
|
||||||
) -> AsyncStream:
|
) -> AsyncStream:
|
||||||
if self.log_requests:
|
if self.log_requests:
|
||||||
|
shortened_prompt = prompt
|
||||||
|
shortened_token_ids = prompt_token_ids
|
||||||
|
if self.max_log_len is not None:
|
||||||
|
if shortened_prompt is not None:
|
||||||
|
shortened_prompt = shortened_prompt[:self.max_log_len]
|
||||||
|
if shortened_token_ids is not None:
|
||||||
|
shortened_token_ids = shortened_token_ids[:self.
|
||||||
|
max_log_len]
|
||||||
logger.info(f"Received request {request_id}: "
|
logger.info(f"Received request {request_id}: "
|
||||||
f"prompt: {prompt!r}, "
|
f"prompt: {shortened_prompt!r}, "
|
||||||
f"sampling params: {sampling_params}, "
|
f"sampling params: {sampling_params}, "
|
||||||
f"prompt token ids: {prompt_token_ids}.")
|
f"prompt token ids: {shortened_token_ids}.")
|
||||||
|
|
||||||
if not self.is_running:
|
if not self.is_running:
|
||||||
if self.start_engine_loop:
|
if self.start_engine_loop:
|
||||||
@ -446,5 +456,6 @@ class AsyncLLMEngine:
|
|||||||
placement_group,
|
placement_group,
|
||||||
log_requests=not engine_args.disable_log_requests,
|
log_requests=not engine_args.disable_log_requests,
|
||||||
log_stats=not engine_args.disable_log_stats,
|
log_stats=not engine_args.disable_log_stats,
|
||||||
|
max_log_len=engine_args.max_log_len,
|
||||||
start_engine_loop=start_engine_loop)
|
start_engine_loop=start_engine_loop)
|
||||||
return engine
|
return engine
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user