diff --git a/.buildkite/nightly-benchmarks/README.md b/.buildkite/nightly-benchmarks/README.md index fcde284efea9..3721d3d1d674 100644 --- a/.buildkite/nightly-benchmarks/README.md +++ b/.buildkite/nightly-benchmarks/README.md @@ -104,7 +104,6 @@ We test the throughput by using `vllm bench serve` with request rate = inf to co "tensor_parallel_size": 1, "swap_space": 16, "disable_log_stats": "", - "disable_log_requests": "", "load_format": "dummy" }, "client_parameters": { diff --git a/.buildkite/nightly-benchmarks/tests/genai-perf-tests.json b/.buildkite/nightly-benchmarks/tests/genai-perf-tests.json index edbe9f2df0ce..f26ae7634f3d 100644 --- a/.buildkite/nightly-benchmarks/tests/genai-perf-tests.json +++ b/.buildkite/nightly-benchmarks/tests/genai-perf-tests.json @@ -11,7 +11,6 @@ }, "vllm_server_parameters": { "disable_log_stats": "", - "disable_log_requests": "", "gpu_memory_utilization": 0.9, "num_scheduler_steps": 10, "max_num_seqs": 512, diff --git a/.buildkite/nightly-benchmarks/tests/nightly-tests.json b/.buildkite/nightly-benchmarks/tests/nightly-tests.json index fda1a7a3ec53..41b4a4008801 100644 --- a/.buildkite/nightly-benchmarks/tests/nightly-tests.json +++ b/.buildkite/nightly-benchmarks/tests/nightly-tests.json @@ -35,7 +35,6 @@ }, "vllm_server_parameters": { "disable_log_stats": "", - "disable_log_requests": "", "gpu_memory_utilization": 0.9, "num_scheduler_steps": 10, "max_num_seqs": 512, @@ -90,7 +89,6 @@ }, "vllm_server_parameters": { "disable_log_stats": "", - "disable_log_requests": "", "gpu_memory_utilization": 0.9, "num_scheduler_steps": 10, "max_num_seqs": 512, @@ -145,7 +143,6 @@ }, "vllm_server_parameters": { "disable_log_stats": "", - "disable_log_requests": "", "gpu_memory_utilization": 0.9, "num_scheduler_steps": 10, "max_num_seqs": 512, @@ -197,7 +194,6 @@ }, "vllm_server_parameters": { "disable_log_stats": "", - "disable_log_requests": "", "gpu_memory_utilization": 0.9, "num_scheduler_steps": 10, "max_num_seqs": 512, @@ -251,7 +247,6 @@ }, "vllm_server_parameters": { "disable_log_stats": "", - "disable_log_requests": "", "gpu_memory_utilization": 0.9, "num_scheduler_steps": 10, "max_num_seqs": 512, @@ -305,7 +300,6 @@ }, "vllm_server_parameters": { "disable_log_stats": "", - "disable_log_requests": "", "gpu_memory_utilization": 0.9, "num_scheduler_steps": 10, "max_num_seqs": 512, diff --git a/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc2.json b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc2.json index a144b4420fbf..dd0e24edff98 100644 --- a/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc2.json +++ b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc2.json @@ -17,7 +17,6 @@ "block_size": 128, "trust_remote_code": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, @@ -50,7 +49,6 @@ "block_size": 128, "trust_remote_code": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, @@ -83,7 +81,6 @@ "block_size": 128, "trust_remote_code": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, @@ -117,7 +114,6 @@ "trust_remote_code": "", "enable_chunked_prefill": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, @@ -153,7 +149,6 @@ "trust_remote_code": "", "enable_chunked_prefill": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, @@ -189,7 +184,6 @@ "trust_remote_code": "", "enable_chunked_prefill": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, diff --git a/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc3.json b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc3.json index e6e69b63b74d..f1bda65a7590 100644 --- a/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc3.json +++ b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc3.json @@ -17,7 +17,6 @@ "block_size": 128, "trust_remote_code": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, @@ -50,7 +49,6 @@ "block_size": 128, "trust_remote_code": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, @@ -84,7 +82,6 @@ "block_size": 128, "trust_remote_code": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, @@ -118,7 +115,6 @@ "trust_remote_code": "", "enable_chunked_prefill": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, @@ -154,7 +150,6 @@ "trust_remote_code": "", "enable_chunked_prefill": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, @@ -191,7 +186,6 @@ "trust_remote_code": "", "enable_chunked_prefill": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, diff --git a/.buildkite/nightly-benchmarks/tests/serving-tests-cpu.json b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu.json index ce1f924de387..f150b9abeea4 100644 --- a/.buildkite/nightly-benchmarks/tests/serving-tests-cpu.json +++ b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu.json @@ -17,7 +17,6 @@ "block_size": 128, "trust_remote_code": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, @@ -50,7 +49,6 @@ "block_size": 128, "trust_remote_code": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, @@ -83,7 +81,6 @@ "block_size": 128, "trust_remote_code": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, @@ -117,7 +114,6 @@ "trust_remote_code": "", "enable_chunked_prefill": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, @@ -153,7 +149,6 @@ "trust_remote_code": "", "enable_chunked_prefill": "", "disable_log_stats": "", - "disable_log_requests": "", "enforce_eager": "", "max_num_batched_tokens": 2048, "max_num_seqs": 256, diff --git a/.buildkite/nightly-benchmarks/tests/serving-tests.json b/.buildkite/nightly-benchmarks/tests/serving-tests.json index 13fd5aa8db97..a6d4141d5c2d 100644 --- a/.buildkite/nightly-benchmarks/tests/serving-tests.json +++ b/.buildkite/nightly-benchmarks/tests/serving-tests.json @@ -7,7 +7,6 @@ "tensor_parallel_size": 1, "swap_space": 16, "disable_log_stats": "", - "disable_log_requests": "", "load_format": "dummy" }, "client_parameters": { @@ -26,7 +25,6 @@ "tensor_parallel_size": 4, "swap_space": 16, "disable_log_stats": "", - "disable_log_requests": "", "load_format": "dummy" }, "client_parameters": { @@ -45,7 +43,6 @@ "tensor_parallel_size": 2, "swap_space": 16, "disable_log_stats": "", - "disable_log_requests": "", "load_format": "dummy" }, "client_parameters": { @@ -60,8 +57,7 @@ "test_name": "serving_llama70B_tp4_sharegpt_specdecode", "qps_list": [2], "server_parameters": { - "model": "meta-llama/Meta-Llama-3.1-70B-Instruct", - "disable_log_requests": "", + "model": "meta-llama/Meta-Llama-3.1-70B-Instruct", "tensor_parallel_size": 4, "swap_space": 16, "speculative_config": { diff --git a/tests/config/test_mp_reducer.py b/tests/config/test_mp_reducer.py index ee351cbfa7c1..d4d4be293280 100644 --- a/tests/config/test_mp_reducer.py +++ b/tests/config/test_mp_reducer.py @@ -28,7 +28,6 @@ def test_mp_reducer(monkeypatch): max_model_len=32, gpu_memory_utilization=0.1, disable_log_stats=True, - disable_log_requests=True, ) async_llm = AsyncLLM.from_engine_args( diff --git a/tests/mq_llm_engine/test_load.py b/tests/mq_llm_engine/test_load.py index e9fd5b814f28..c934706611ae 100644 --- a/tests/mq_llm_engine/test_load.py +++ b/tests/mq_llm_engine/test_load.py @@ -16,7 +16,7 @@ NUM_EXPECTED_TOKENS = 10 NUM_REQUESTS = 10000 # Scenarios to test for num generated token. -ENGINE_ARGS = AsyncEngineArgs(model=MODEL, disable_log_requests=True) +ENGINE_ARGS = AsyncEngineArgs(model=MODEL) @pytest.fixture(scope="function") diff --git a/tests/v1/engine/test_async_llm.py b/tests/v1/engine/test_async_llm.py index 412df3acff12..21694491dd73 100644 --- a/tests/v1/engine/test_async_llm.py +++ b/tests/v1/engine/test_async_llm.py @@ -26,12 +26,10 @@ if not current_platform.is_cuda(): TEXT_ENGINE_ARGS = AsyncEngineArgs( model="meta-llama/Llama-3.2-1B-Instruct", enforce_eager=True, - disable_log_requests=True, ) VISION_ENGINE_ARGS = AsyncEngineArgs(model="Qwen/Qwen2-VL-2B-Instruct", - enforce_eager=True, - disable_log_requests=True) + enforce_eager=True) TEXT_PROMPT = "Hello my name is Robert and" diff --git a/tests/v1/test_async_llm_dp.py b/tests/v1/test_async_llm_dp.py index 6716d27f571f..c2610a87ac78 100644 --- a/tests/v1/test_async_llm_dp.py +++ b/tests/v1/test_async_llm_dp.py @@ -25,7 +25,6 @@ DP_SIZE = int(os.getenv("DP_SIZE", 2)) engine_args = AsyncEngineArgs( model="ibm-research/PowerMoE-3b", enforce_eager=True, - disable_log_requests=True, tensor_parallel_size=int(os.getenv("TP_SIZE", 1)), data_parallel_size=DP_SIZE, ) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index f938f19b9046..0d38b5b5302c 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -18,7 +18,7 @@ from typing import (TYPE_CHECKING, Annotated, Any, Callable, Dict, List, import regex as re import torch from pydantic import TypeAdapter, ValidationError -from typing_extensions import TypeIs +from typing_extensions import TypeIs, deprecated import vllm.envs as envs from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig, @@ -1704,7 +1704,23 @@ class EngineArgs: @dataclass class AsyncEngineArgs(EngineArgs): """Arguments for asynchronous vLLM engine.""" - disable_log_requests: bool = False + enable_log_requests: bool = False + + @property + @deprecated( + "`disable_log_requests` is deprecated and has been replaced with " + "`enable_log_requests`. This will be removed in v0.12.0. Please use " + "`enable_log_requests` instead.") + def disable_log_requests(self) -> bool: + return not self.enable_log_requests + + @disable_log_requests.setter + @deprecated( + "`disable_log_requests` is deprecated and has been replaced with " + "`enable_log_requests`. This will be removed in v0.12.0. Please use " + "`enable_log_requests` instead.") + def disable_log_requests(self, value: bool): + self.enable_log_requests = not value @staticmethod def add_cli_args(parser: FlexibleArgumentParser, @@ -1715,9 +1731,15 @@ class AsyncEngineArgs(EngineArgs): load_general_plugins() if not async_args_only: parser = EngineArgs.add_cli_args(parser) + parser.add_argument('--enable-log-requests', + action=argparse.BooleanOptionalAction, + default=AsyncEngineArgs.enable_log_requests, + help='Enable logging requests.') parser.add_argument('--disable-log-requests', - action='store_true', - help='Disable logging requests.') + action=argparse.BooleanOptionalAction, + default=not AsyncEngineArgs.enable_log_requests, + help='[DEPRECATED] Disable logging requests.', + deprecated=True) current_platform.pre_register_and_update(parser) return parser diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 06bb4eeab69e..1f962b008ee0 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -30,7 +30,7 @@ from vllm.sampling_params import SamplingParams from vllm.sequence import ExecuteModelRequest from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.usage.usage_lib import UsageContext -from vllm.utils import Device, weak_bind +from vllm.utils import Device, deprecate_kwargs, weak_bind logger = init_logger(__name__) ENGINE_ITERATION_TIMEOUT_S = envs.VLLM_ENGINE_ITERATION_TIMEOUT_S @@ -554,14 +554,20 @@ class AsyncLLMEngine(EngineClient): return LLMEngine._get_executor_cls(engine_config) @classmethod + @deprecate_kwargs( + "disable_log_requests", + additional_message=("This argument will have no effect. " + "Use `enable_log_requests` instead."), + ) def from_vllm_config( - cls, - vllm_config: VllmConfig, - start_engine_loop: bool = True, - usage_context: UsageContext = UsageContext.ENGINE_CONTEXT, - stat_loggers: Optional[dict[str, StatLoggerBase]] = None, - disable_log_requests: bool = False, - disable_log_stats: bool = False, + cls, + vllm_config: VllmConfig, + start_engine_loop: bool = True, + usage_context: UsageContext = UsageContext.ENGINE_CONTEXT, + stat_loggers: Optional[dict[str, StatLoggerBase]] = None, + enable_log_requests: bool = False, + disable_log_stats: bool = False, + disable_log_requests: bool = True, # Deprecated, will be removed ) -> "AsyncLLMEngine": """Create an AsyncLLMEngine from the EngineArgs.""" @@ -569,7 +575,7 @@ class AsyncLLMEngine(EngineClient): vllm_config=vllm_config, executor_class=cls._get_executor_cls(vllm_config), start_engine_loop=start_engine_loop, - log_requests=not disable_log_requests, + log_requests=enable_log_requests, log_stats=not disable_log_stats, usage_context=usage_context, stat_loggers=stat_loggers, @@ -598,7 +604,7 @@ class AsyncLLMEngine(EngineClient): usage_context=usage_context, stat_loggers=stat_loggers, disable_log_stats=engine_args.disable_log_stats, - disable_log_requests=engine_args.disable_log_requests, + enable_log_requests=engine_args.enable_log_requests, ) @property diff --git a/vllm/engine/multiprocessing/engine.py b/vllm/engine/multiprocessing/engine.py index fe6eb0d8c2f1..903f3fd71ebc 100644 --- a/vllm/engine/multiprocessing/engine.py +++ b/vllm/engine/multiprocessing/engine.py @@ -34,6 +34,7 @@ from vllm.outputs import RequestOutput from vllm.transformers_utils.config import ( maybe_register_config_serialize_by_value) from vllm.usage.usage_lib import UsageContext +from vllm.utils import deprecate_kwargs from vllm.worker.model_runner_base import InputProcessingError logger = init_logger(__name__) @@ -120,10 +121,20 @@ class MQLLMEngine: return ENGINE_DEAD_ERROR() @classmethod - def from_vllm_config(cls, vllm_config: VllmConfig, - usage_context: UsageContext, - disable_log_requests: bool, disable_log_stats: bool, - ipc_path: str) -> "MQLLMEngine": + @deprecate_kwargs( + "disable_log_requests", + additional_message=("This argument will have no effect. " + "Use `enable_log_requests` instead."), + ) + def from_vllm_config( + cls, + vllm_config: VllmConfig, + usage_context: UsageContext, + enable_log_requests: bool, + disable_log_stats: bool, + ipc_path: str, + disable_log_requests: bool = True, # Deprecated, will be removed + ) -> "MQLLMEngine": # Setup plugins for each process from vllm.plugins import load_general_plugins load_general_plugins() @@ -136,7 +147,7 @@ class MQLLMEngine: ipc_path=ipc_path, usage_context=usage_context, use_async_sockets=use_async_sockets, - log_requests=(not disable_log_requests), + log_requests=enable_log_requests, log_stats=(not disable_log_stats), ) @@ -150,7 +161,7 @@ class MQLLMEngine: ipc_path=ipc_path, vllm_config=vllm_config, usage_context=usage_context, - disable_log_requests=engine_args.disable_log_requests, + enable_log_requests=engine_args.enable_log_requests, disable_log_stats=engine_args.disable_log_stats, ) @@ -436,7 +447,7 @@ def signal_handler(*_) -> None: def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext, ipc_path: str, disable_log_stats: bool, - disable_log_requests: bool, engine_alive): + enable_log_requests: bool, engine_alive): try: # Ensure we can serialize transformer config before spawning maybe_register_config_serialize_by_value() @@ -445,7 +456,7 @@ def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext, vllm_config=vllm_config, usage_context=usage_context, disable_log_stats=disable_log_stats, - disable_log_requests=disable_log_requests, + enable_log_requests=enable_log_requests, ipc_path=ipc_path) signal.signal(signal.SIGTERM, signal_handler) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 1be03c57a1f1..b8ec5461f771 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -205,7 +205,7 @@ async def build_async_engine_client_from_engine_args( async_llm = AsyncLLM.from_vllm_config( vllm_config=vllm_config, usage_context=usage_context, - disable_log_requests=engine_args.disable_log_requests, + enable_log_requests=engine_args.enable_log_requests, disable_log_stats=engine_args.disable_log_stats, client_addresses=client_config, client_index=client_index) @@ -227,7 +227,7 @@ async def build_async_engine_client_from_engine_args( engine_client = AsyncLLMEngine.from_vllm_config( vllm_config=vllm_config, usage_context=usage_context, - disable_log_requests=engine_args.disable_log_requests, + enable_log_requests=engine_args.enable_log_requests, disable_log_stats=engine_args.disable_log_stats) yield engine_client finally: @@ -272,7 +272,7 @@ async def build_async_engine_client_from_engine_args( target=run_mp_engine, args=(vllm_config, UsageContext.OPENAI_API_SERVER, ipc_path, engine_args.disable_log_stats, - engine_args.disable_log_requests, engine_alive)) + engine_args.enable_log_requests, engine_alive)) engine_process.start() engine_pid = engine_process.pid assert engine_pid is not None, "Engine process failed to start." @@ -1570,10 +1570,10 @@ async def init_app_state( else: served_model_names = [args.model] - if args.disable_log_requests: - request_logger = None - else: + if args.enable_log_requests: request_logger = RequestLogger(max_log_len=args.max_log_len) + else: + request_logger = None base_model_paths = [ BaseModelPath(name=name, model_path=args.model) diff --git a/vllm/entrypoints/openai/run_batch.py b/vllm/entrypoints/openai/run_batch.py index 137b368dad20..d146ad485d19 100644 --- a/vllm/entrypoints/openai/run_batch.py +++ b/vllm/entrypoints/openai/run_batch.py @@ -324,10 +324,10 @@ async def run_batch( else: served_model_names = [args.model] - if args.disable_log_requests: - request_logger = None - else: + if args.enable_log_requests: request_logger = RequestLogger(max_log_len=args.max_log_len) + else: + request_logger = None base_model_paths = [ BaseModelPath(name=name, model_path=args.model) diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index d5d8d9dad73a..7405f3986df8 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -1668,6 +1668,12 @@ class FlexibleArgumentParser(ArgumentParser): # Enable the deprecated kwarg for Python 3.12 and below def parse_known_args(self, args=None, namespace=None): + if args is not None and "--disable-log-requests" in args: + # Special case warning because the warning below won't trigger + # if –-disable-log-requests because its value is default. + logger.warning_once( + "argument '--disable-log-requests' is deprecated. This " + "will be removed in v0.12.0.") namespace, args = super().parse_known_args(args, namespace) for action in FlexibleArgumentParser._deprecated: if (hasattr(namespace, dest := action.dest) diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index ed0d9620f476..308ca32105ba 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -27,7 +27,7 @@ from vllm.transformers_utils.config import ( from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs from vllm.usage.usage_lib import UsageContext -from vllm.utils import Device, cdiv +from vllm.utils import Device, cdiv, deprecate_kwargs from vllm.v1.engine import EngineCoreRequest from vllm.v1.engine.core_client import EngineCoreClient from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError @@ -142,16 +142,22 @@ class AsyncLLM(EngineClient): pass @classmethod + @deprecate_kwargs( + "disable_log_requests", + additional_message=("This argument will have no effect. " + "Use `enable_log_requests` instead."), + ) def from_vllm_config( - cls, - vllm_config: VllmConfig, - start_engine_loop: bool = True, - usage_context: UsageContext = UsageContext.ENGINE_CONTEXT, - stat_loggers: Optional[list[StatLoggerFactory]] = None, - disable_log_requests: bool = False, - disable_log_stats: bool = False, - client_addresses: Optional[dict[str, str]] = None, - client_index: int = 0, + cls, + vllm_config: VllmConfig, + start_engine_loop: bool = True, + usage_context: UsageContext = UsageContext.ENGINE_CONTEXT, + stat_loggers: Optional[list[StatLoggerFactory]] = None, + enable_log_requests: bool = False, + disable_log_stats: bool = False, + client_addresses: Optional[dict[str, str]] = None, + client_index: int = 0, + disable_log_requests: bool = True, # Deprecated, will be removed ) -> "AsyncLLM": if not envs.VLLM_USE_V1: raise ValueError( @@ -166,7 +172,7 @@ class AsyncLLM(EngineClient): executor_class=Executor.get_class(vllm_config), start_engine_loop=start_engine_loop, stat_loggers=stat_loggers, - log_requests=not disable_log_requests, + log_requests=enable_log_requests, log_stats=not disable_log_stats, usage_context=usage_context, client_addresses=client_addresses, @@ -191,7 +197,7 @@ class AsyncLLM(EngineClient): return cls( vllm_config=vllm_config, executor_class=executor_class, - log_requests=not engine_args.disable_log_requests, + log_requests=engine_args.enable_log_requests, log_stats=not engine_args.disable_log_stats, start_engine_loop=start_engine_loop, usage_context=usage_context,