Deprecate --disable-log-requests and replace with --enable-log-requests (#21739)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor 2025-08-01 17:16:37 +01:00 committed by GitHub
parent 97608dc276
commit 2d7b09b998
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 97 additions and 79 deletions

View File

@ -104,7 +104,6 @@ We test the throughput by using `vllm bench serve` with request rate = inf to co
"tensor_parallel_size": 1,
"swap_space": 16,
"disable_log_stats": "",
"disable_log_requests": "",
"load_format": "dummy"
},
"client_parameters": {

View File

@ -11,7 +11,6 @@
},
"vllm_server_parameters": {
"disable_log_stats": "",
"disable_log_requests": "",
"gpu_memory_utilization": 0.9,
"num_scheduler_steps": 10,
"max_num_seqs": 512,

View File

@ -35,7 +35,6 @@
},
"vllm_server_parameters": {
"disable_log_stats": "",
"disable_log_requests": "",
"gpu_memory_utilization": 0.9,
"num_scheduler_steps": 10,
"max_num_seqs": 512,
@ -90,7 +89,6 @@
},
"vllm_server_parameters": {
"disable_log_stats": "",
"disable_log_requests": "",
"gpu_memory_utilization": 0.9,
"num_scheduler_steps": 10,
"max_num_seqs": 512,
@ -145,7 +143,6 @@
},
"vllm_server_parameters": {
"disable_log_stats": "",
"disable_log_requests": "",
"gpu_memory_utilization": 0.9,
"num_scheduler_steps": 10,
"max_num_seqs": 512,
@ -197,7 +194,6 @@
},
"vllm_server_parameters": {
"disable_log_stats": "",
"disable_log_requests": "",
"gpu_memory_utilization": 0.9,
"num_scheduler_steps": 10,
"max_num_seqs": 512,
@ -251,7 +247,6 @@
},
"vllm_server_parameters": {
"disable_log_stats": "",
"disable_log_requests": "",
"gpu_memory_utilization": 0.9,
"num_scheduler_steps": 10,
"max_num_seqs": 512,
@ -305,7 +300,6 @@
},
"vllm_server_parameters": {
"disable_log_stats": "",
"disable_log_requests": "",
"gpu_memory_utilization": 0.9,
"num_scheduler_steps": 10,
"max_num_seqs": 512,

View File

@ -17,7 +17,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
@ -50,7 +49,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
@ -83,7 +81,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
@ -117,7 +114,6 @@
"trust_remote_code": "",
"enable_chunked_prefill": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
@ -153,7 +149,6 @@
"trust_remote_code": "",
"enable_chunked_prefill": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
@ -189,7 +184,6 @@
"trust_remote_code": "",
"enable_chunked_prefill": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,

View File

@ -17,7 +17,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
@ -50,7 +49,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
@ -84,7 +82,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
@ -118,7 +115,6 @@
"trust_remote_code": "",
"enable_chunked_prefill": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
@ -154,7 +150,6 @@
"trust_remote_code": "",
"enable_chunked_prefill": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
@ -191,7 +186,6 @@
"trust_remote_code": "",
"enable_chunked_prefill": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,

View File

@ -17,7 +17,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
@ -50,7 +49,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
@ -83,7 +81,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
@ -117,7 +114,6 @@
"trust_remote_code": "",
"enable_chunked_prefill": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
@ -153,7 +149,6 @@
"trust_remote_code": "",
"enable_chunked_prefill": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,

View File

@ -7,7 +7,6 @@
"tensor_parallel_size": 1,
"swap_space": 16,
"disable_log_stats": "",
"disable_log_requests": "",
"load_format": "dummy"
},
"client_parameters": {
@ -26,7 +25,6 @@
"tensor_parallel_size": 4,
"swap_space": 16,
"disable_log_stats": "",
"disable_log_requests": "",
"load_format": "dummy"
},
"client_parameters": {
@ -45,7 +43,6 @@
"tensor_parallel_size": 2,
"swap_space": 16,
"disable_log_stats": "",
"disable_log_requests": "",
"load_format": "dummy"
},
"client_parameters": {
@ -60,8 +57,7 @@
"test_name": "serving_llama70B_tp4_sharegpt_specdecode",
"qps_list": [2],
"server_parameters": {
"model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
"disable_log_requests": "",
"model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
"tensor_parallel_size": 4,
"swap_space": 16,
"speculative_config": {

View File

@ -28,7 +28,6 @@ def test_mp_reducer(monkeypatch):
max_model_len=32,
gpu_memory_utilization=0.1,
disable_log_stats=True,
disable_log_requests=True,
)
async_llm = AsyncLLM.from_engine_args(

View File

@ -16,7 +16,7 @@ NUM_EXPECTED_TOKENS = 10
NUM_REQUESTS = 10000
# Scenarios to test for num generated token.
ENGINE_ARGS = AsyncEngineArgs(model=MODEL, disable_log_requests=True)
ENGINE_ARGS = AsyncEngineArgs(model=MODEL)
@pytest.fixture(scope="function")

View File

@ -26,12 +26,10 @@ if not current_platform.is_cuda():
TEXT_ENGINE_ARGS = AsyncEngineArgs(
model="meta-llama/Llama-3.2-1B-Instruct",
enforce_eager=True,
disable_log_requests=True,
)
VISION_ENGINE_ARGS = AsyncEngineArgs(model="Qwen/Qwen2-VL-2B-Instruct",
enforce_eager=True,
disable_log_requests=True)
enforce_eager=True)
TEXT_PROMPT = "Hello my name is Robert and"

View File

@ -25,7 +25,6 @@ DP_SIZE = int(os.getenv("DP_SIZE", 2))
engine_args = AsyncEngineArgs(
model="ibm-research/PowerMoE-3b",
enforce_eager=True,
disable_log_requests=True,
tensor_parallel_size=int(os.getenv("TP_SIZE", 1)),
data_parallel_size=DP_SIZE,
)

View File

@ -18,7 +18,7 @@ from typing import (TYPE_CHECKING, Annotated, Any, Callable, Dict, List,
import regex as re
import torch
from pydantic import TypeAdapter, ValidationError
from typing_extensions import TypeIs
from typing_extensions import TypeIs, deprecated
import vllm.envs as envs
from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
@ -1704,7 +1704,23 @@ class EngineArgs:
@dataclass
class AsyncEngineArgs(EngineArgs):
"""Arguments for asynchronous vLLM engine."""
disable_log_requests: bool = False
enable_log_requests: bool = False
@property
@deprecated(
"`disable_log_requests` is deprecated and has been replaced with "
"`enable_log_requests`. This will be removed in v0.12.0. Please use "
"`enable_log_requests` instead.")
def disable_log_requests(self) -> bool:
return not self.enable_log_requests
@disable_log_requests.setter
@deprecated(
"`disable_log_requests` is deprecated and has been replaced with "
"`enable_log_requests`. This will be removed in v0.12.0. Please use "
"`enable_log_requests` instead.")
def disable_log_requests(self, value: bool):
self.enable_log_requests = not value
@staticmethod
def add_cli_args(parser: FlexibleArgumentParser,
@ -1715,9 +1731,15 @@ class AsyncEngineArgs(EngineArgs):
load_general_plugins()
if not async_args_only:
parser = EngineArgs.add_cli_args(parser)
parser.add_argument('--enable-log-requests',
action=argparse.BooleanOptionalAction,
default=AsyncEngineArgs.enable_log_requests,
help='Enable logging requests.')
parser.add_argument('--disable-log-requests',
action='store_true',
help='Disable logging requests.')
action=argparse.BooleanOptionalAction,
default=not AsyncEngineArgs.enable_log_requests,
help='[DEPRECATED] Disable logging requests.',
deprecated=True)
current_platform.pre_register_and_update(parser)
return parser

View File

@ -30,7 +30,7 @@ from vllm.sampling_params import SamplingParams
from vllm.sequence import ExecuteModelRequest
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.usage.usage_lib import UsageContext
from vllm.utils import Device, weak_bind
from vllm.utils import Device, deprecate_kwargs, weak_bind
logger = init_logger(__name__)
ENGINE_ITERATION_TIMEOUT_S = envs.VLLM_ENGINE_ITERATION_TIMEOUT_S
@ -554,14 +554,20 @@ class AsyncLLMEngine(EngineClient):
return LLMEngine._get_executor_cls(engine_config)
@classmethod
@deprecate_kwargs(
"disable_log_requests",
additional_message=("This argument will have no effect. "
"Use `enable_log_requests` instead."),
)
def from_vllm_config(
cls,
vllm_config: VllmConfig,
start_engine_loop: bool = True,
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
stat_loggers: Optional[dict[str, StatLoggerBase]] = None,
disable_log_requests: bool = False,
disable_log_stats: bool = False,
cls,
vllm_config: VllmConfig,
start_engine_loop: bool = True,
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
stat_loggers: Optional[dict[str, StatLoggerBase]] = None,
enable_log_requests: bool = False,
disable_log_stats: bool = False,
disable_log_requests: bool = True, # Deprecated, will be removed
) -> "AsyncLLMEngine":
"""Create an AsyncLLMEngine from the EngineArgs."""
@ -569,7 +575,7 @@ class AsyncLLMEngine(EngineClient):
vllm_config=vllm_config,
executor_class=cls._get_executor_cls(vllm_config),
start_engine_loop=start_engine_loop,
log_requests=not disable_log_requests,
log_requests=enable_log_requests,
log_stats=not disable_log_stats,
usage_context=usage_context,
stat_loggers=stat_loggers,
@ -598,7 +604,7 @@ class AsyncLLMEngine(EngineClient):
usage_context=usage_context,
stat_loggers=stat_loggers,
disable_log_stats=engine_args.disable_log_stats,
disable_log_requests=engine_args.disable_log_requests,
enable_log_requests=engine_args.enable_log_requests,
)
@property

View File

@ -34,6 +34,7 @@ from vllm.outputs import RequestOutput
from vllm.transformers_utils.config import (
maybe_register_config_serialize_by_value)
from vllm.usage.usage_lib import UsageContext
from vllm.utils import deprecate_kwargs
from vllm.worker.model_runner_base import InputProcessingError
logger = init_logger(__name__)
@ -120,10 +121,20 @@ class MQLLMEngine:
return ENGINE_DEAD_ERROR()
@classmethod
def from_vllm_config(cls, vllm_config: VllmConfig,
usage_context: UsageContext,
disable_log_requests: bool, disable_log_stats: bool,
ipc_path: str) -> "MQLLMEngine":
@deprecate_kwargs(
"disable_log_requests",
additional_message=("This argument will have no effect. "
"Use `enable_log_requests` instead."),
)
def from_vllm_config(
cls,
vllm_config: VllmConfig,
usage_context: UsageContext,
enable_log_requests: bool,
disable_log_stats: bool,
ipc_path: str,
disable_log_requests: bool = True, # Deprecated, will be removed
) -> "MQLLMEngine":
# Setup plugins for each process
from vllm.plugins import load_general_plugins
load_general_plugins()
@ -136,7 +147,7 @@ class MQLLMEngine:
ipc_path=ipc_path,
usage_context=usage_context,
use_async_sockets=use_async_sockets,
log_requests=(not disable_log_requests),
log_requests=enable_log_requests,
log_stats=(not disable_log_stats),
)
@ -150,7 +161,7 @@ class MQLLMEngine:
ipc_path=ipc_path,
vllm_config=vllm_config,
usage_context=usage_context,
disable_log_requests=engine_args.disable_log_requests,
enable_log_requests=engine_args.enable_log_requests,
disable_log_stats=engine_args.disable_log_stats,
)
@ -436,7 +447,7 @@ def signal_handler(*_) -> None:
def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext,
ipc_path: str, disable_log_stats: bool,
disable_log_requests: bool, engine_alive):
enable_log_requests: bool, engine_alive):
try:
# Ensure we can serialize transformer config before spawning
maybe_register_config_serialize_by_value()
@ -445,7 +456,7 @@ def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext,
vllm_config=vllm_config,
usage_context=usage_context,
disable_log_stats=disable_log_stats,
disable_log_requests=disable_log_requests,
enable_log_requests=enable_log_requests,
ipc_path=ipc_path)
signal.signal(signal.SIGTERM, signal_handler)

View File

@ -205,7 +205,7 @@ async def build_async_engine_client_from_engine_args(
async_llm = AsyncLLM.from_vllm_config(
vllm_config=vllm_config,
usage_context=usage_context,
disable_log_requests=engine_args.disable_log_requests,
enable_log_requests=engine_args.enable_log_requests,
disable_log_stats=engine_args.disable_log_stats,
client_addresses=client_config,
client_index=client_index)
@ -227,7 +227,7 @@ async def build_async_engine_client_from_engine_args(
engine_client = AsyncLLMEngine.from_vllm_config(
vllm_config=vllm_config,
usage_context=usage_context,
disable_log_requests=engine_args.disable_log_requests,
enable_log_requests=engine_args.enable_log_requests,
disable_log_stats=engine_args.disable_log_stats)
yield engine_client
finally:
@ -272,7 +272,7 @@ async def build_async_engine_client_from_engine_args(
target=run_mp_engine,
args=(vllm_config, UsageContext.OPENAI_API_SERVER, ipc_path,
engine_args.disable_log_stats,
engine_args.disable_log_requests, engine_alive))
engine_args.enable_log_requests, engine_alive))
engine_process.start()
engine_pid = engine_process.pid
assert engine_pid is not None, "Engine process failed to start."
@ -1570,10 +1570,10 @@ async def init_app_state(
else:
served_model_names = [args.model]
if args.disable_log_requests:
request_logger = None
else:
if args.enable_log_requests:
request_logger = RequestLogger(max_log_len=args.max_log_len)
else:
request_logger = None
base_model_paths = [
BaseModelPath(name=name, model_path=args.model)

View File

@ -324,10 +324,10 @@ async def run_batch(
else:
served_model_names = [args.model]
if args.disable_log_requests:
request_logger = None
else:
if args.enable_log_requests:
request_logger = RequestLogger(max_log_len=args.max_log_len)
else:
request_logger = None
base_model_paths = [
BaseModelPath(name=name, model_path=args.model)

View File

@ -1668,6 +1668,12 @@ class FlexibleArgumentParser(ArgumentParser):
# Enable the deprecated kwarg for Python 3.12 and below
def parse_known_args(self, args=None, namespace=None):
if args is not None and "--disable-log-requests" in args:
# Special case warning because the warning below won't trigger
# if -disable-log-requests because its value is default.
logger.warning_once(
"argument '--disable-log-requests' is deprecated. This "
"will be removed in v0.12.0.")
namespace, args = super().parse_known_args(args, namespace)
for action in FlexibleArgumentParser._deprecated:
if (hasattr(namespace, dest := action.dest)

View File

@ -27,7 +27,7 @@ from vllm.transformers_utils.config import (
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
from vllm.usage.usage_lib import UsageContext
from vllm.utils import Device, cdiv
from vllm.utils import Device, cdiv, deprecate_kwargs
from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.core_client import EngineCoreClient
from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError
@ -142,16 +142,22 @@ class AsyncLLM(EngineClient):
pass
@classmethod
@deprecate_kwargs(
"disable_log_requests",
additional_message=("This argument will have no effect. "
"Use `enable_log_requests` instead."),
)
def from_vllm_config(
cls,
vllm_config: VllmConfig,
start_engine_loop: bool = True,
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
stat_loggers: Optional[list[StatLoggerFactory]] = None,
disable_log_requests: bool = False,
disable_log_stats: bool = False,
client_addresses: Optional[dict[str, str]] = None,
client_index: int = 0,
cls,
vllm_config: VllmConfig,
start_engine_loop: bool = True,
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
stat_loggers: Optional[list[StatLoggerFactory]] = None,
enable_log_requests: bool = False,
disable_log_stats: bool = False,
client_addresses: Optional[dict[str, str]] = None,
client_index: int = 0,
disable_log_requests: bool = True, # Deprecated, will be removed
) -> "AsyncLLM":
if not envs.VLLM_USE_V1:
raise ValueError(
@ -166,7 +172,7 @@ class AsyncLLM(EngineClient):
executor_class=Executor.get_class(vllm_config),
start_engine_loop=start_engine_loop,
stat_loggers=stat_loggers,
log_requests=not disable_log_requests,
log_requests=enable_log_requests,
log_stats=not disable_log_stats,
usage_context=usage_context,
client_addresses=client_addresses,
@ -191,7 +197,7 @@ class AsyncLLM(EngineClient):
return cls(
vllm_config=vllm_config,
executor_class=executor_class,
log_requests=not engine_args.disable_log_requests,
log_requests=engine_args.enable_log_requests,
log_stats=not engine_args.disable_log_stats,
start_engine_loop=start_engine_loop,
usage_context=usage_context,