diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index 42afaeac0e8ee..399d44eb0f9fa 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -253,7 +253,7 @@ export https_proxy=http://your.proxy.server:port https_proxy=http://your.proxy.server:port huggingface-cli download # or use vllm cmd directly -https_proxy=http://your.proxy.server:port vllm serve --disable-log-requests +https_proxy=http://your.proxy.server:port vllm serve ``` - Set the proxy in Python interpreter: diff --git a/tests/engine/test_arg_utils.py b/tests/engine/test_arg_utils.py index 5a91758414a5a..3dff86ed659bf 100644 --- a/tests/engine/test_arg_utils.py +++ b/tests/engine/test_arg_utils.py @@ -290,6 +290,22 @@ def test_prefix_cache_default(): assert not engine_args.enable_prefix_caching +def test_log_request_default_and_legacy_flag(caplog_vllm): + parser = AsyncEngineArgs.add_cli_args(FlexibleArgumentParser()) + + args = parser.parse_args([]) + caplog_vllm.clear() + engine_args = AsyncEngineArgs.from_cli_args(args=args) + assert engine_args.disable_log_requests + assert "--disable-log-requests" in caplog_vllm.text + + caplog_vllm.clear() + args = parser.parse_args(["--enable-legacy-log-requests"]) + engine_args = AsyncEngineArgs.from_cli_args(args=args) + assert not engine_args.disable_log_requests + assert caplog_vllm.text == "" + + # yapf: disable @pytest.mark.parametrize(("arg", "expected", "option"), [ (None, None, "mm-processor-kwargs"), diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index ae5eb46fa9677..ef555dfd74f05 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -881,6 +881,17 @@ class EngineArgs: attrs = [attr.name for attr in dataclasses.fields(cls)] # Set the attributes from the parsed arguments. engine_args = cls(**{attr: getattr(args, attr) for attr in attrs}) + if isinstance(engine_args, AsyncEngineArgs): + if getattr(args, 'disable_log_requests', False): + engine_args.disable_log_requests = True + elif getattr(args, 'enable_legacy_log_requests', False): + engine_args.disable_log_requests = False + else: + engine_args.disable_log_requests = True + logger.warning( + "'--disable-log-requests' is now enabled by default. " + "Use --enable-legacy-log-requests to restore the previous " + "request logging behavior.") return engine_args def create_model_config(self) -> ModelConfig: @@ -1701,7 +1712,8 @@ class EngineArgs: @dataclass class AsyncEngineArgs(EngineArgs): """Arguments for asynchronous vLLM engine.""" - disable_log_requests: bool = False + disable_log_requests: bool = True + enable_legacy_log_requests: bool = False @staticmethod def add_cli_args(parser: FlexibleArgumentParser, @@ -1715,6 +1727,9 @@ class AsyncEngineArgs(EngineArgs): parser.add_argument('--disable-log-requests', action='store_true', help='Disable logging requests.') + parser.add_argument('--enable-legacy-log-requests', + action='store_true', + help='Enable legacy request logging behavior.') current_platform.pre_register_and_update(parser) return parser diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index c2185acbf0c02..10b6c5377fbc6 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -1448,13 +1448,14 @@ async def init_app_state( vllm_config: VllmConfig, state: State, args: Namespace, + engine_args: AsyncEngineArgs, ) -> None: if args.served_model_name is not None: served_model_names = args.served_model_name else: served_model_names = [args.model] - if args.disable_log_requests: + if engine_args.disable_log_requests: request_logger = None else: request_logger = RequestLogger(max_log_len=args.max_log_len) @@ -1704,12 +1705,16 @@ async def run_server_worker(listen_address, if log_config is not None: uvicorn_kwargs['log_config'] = log_config - async with build_async_engine_client(args, client_config) as engine_client: + engine_args = AsyncEngineArgs.from_cli_args(args) + async with build_async_engine_client_from_engine_args( + engine_args, args.disable_frontend_multiprocessing, + client_config) as engine_client: maybe_register_tokenizer_info_endpoint(args) app = build_app(args) vllm_config = await engine_client.get_vllm_config() - await init_app_state(engine_client, vllm_config, app.state, args) + await init_app_state(engine_client, vllm_config, app.state, args, + engine_args) logger.info("Starting vLLM API server %d on %s", server_index, listen_address) diff --git a/vllm/entrypoints/openai/run_batch.py b/vllm/entrypoints/openai/run_batch.py index 3dc5826909a02..74fc2538b56e7 100644 --- a/vllm/entrypoints/openai/run_batch.py +++ b/vllm/entrypoints/openai/run_batch.py @@ -326,7 +326,7 @@ async def main(args): for name in served_model_names ] - if args.disable_log_requests: + if engine_args.disable_log_requests: request_logger = None else: request_logger = RequestLogger(max_log_len=args.max_log_len)