mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 00:45:01 +08:00
Deprecate --disable-log-requests and replace with --enable-log-requests (#21739)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
97608dc276
commit
2d7b09b998
@ -104,7 +104,6 @@ We test the throughput by using `vllm bench serve` with request rate = inf to co
|
||||
"tensor_parallel_size": 1,
|
||||
"swap_space": 16,
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"load_format": "dummy"
|
||||
},
|
||||
"client_parameters": {
|
||||
|
||||
@ -11,7 +11,6 @@
|
||||
},
|
||||
"vllm_server_parameters": {
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"num_scheduler_steps": 10,
|
||||
"max_num_seqs": 512,
|
||||
|
||||
@ -35,7 +35,6 @@
|
||||
},
|
||||
"vllm_server_parameters": {
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"num_scheduler_steps": 10,
|
||||
"max_num_seqs": 512,
|
||||
@ -90,7 +89,6 @@
|
||||
},
|
||||
"vllm_server_parameters": {
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"num_scheduler_steps": 10,
|
||||
"max_num_seqs": 512,
|
||||
@ -145,7 +143,6 @@
|
||||
},
|
||||
"vllm_server_parameters": {
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"num_scheduler_steps": 10,
|
||||
"max_num_seqs": 512,
|
||||
@ -197,7 +194,6 @@
|
||||
},
|
||||
"vllm_server_parameters": {
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"num_scheduler_steps": 10,
|
||||
"max_num_seqs": 512,
|
||||
@ -251,7 +247,6 @@
|
||||
},
|
||||
"vllm_server_parameters": {
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"num_scheduler_steps": 10,
|
||||
"max_num_seqs": 512,
|
||||
@ -305,7 +300,6 @@
|
||||
},
|
||||
"vllm_server_parameters": {
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"num_scheduler_steps": 10,
|
||||
"max_num_seqs": 512,
|
||||
|
||||
@ -17,7 +17,6 @@
|
||||
"block_size": 128,
|
||||
"trust_remote_code": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
@ -50,7 +49,6 @@
|
||||
"block_size": 128,
|
||||
"trust_remote_code": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
@ -83,7 +81,6 @@
|
||||
"block_size": 128,
|
||||
"trust_remote_code": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
@ -117,7 +114,6 @@
|
||||
"trust_remote_code": "",
|
||||
"enable_chunked_prefill": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
@ -153,7 +149,6 @@
|
||||
"trust_remote_code": "",
|
||||
"enable_chunked_prefill": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
@ -189,7 +184,6 @@
|
||||
"trust_remote_code": "",
|
||||
"enable_chunked_prefill": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
|
||||
@ -17,7 +17,6 @@
|
||||
"block_size": 128,
|
||||
"trust_remote_code": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
@ -50,7 +49,6 @@
|
||||
"block_size": 128,
|
||||
"trust_remote_code": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
@ -84,7 +82,6 @@
|
||||
"block_size": 128,
|
||||
"trust_remote_code": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
@ -118,7 +115,6 @@
|
||||
"trust_remote_code": "",
|
||||
"enable_chunked_prefill": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
@ -154,7 +150,6 @@
|
||||
"trust_remote_code": "",
|
||||
"enable_chunked_prefill": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
@ -191,7 +186,6 @@
|
||||
"trust_remote_code": "",
|
||||
"enable_chunked_prefill": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
|
||||
@ -17,7 +17,6 @@
|
||||
"block_size": 128,
|
||||
"trust_remote_code": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
@ -50,7 +49,6 @@
|
||||
"block_size": 128,
|
||||
"trust_remote_code": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
@ -83,7 +81,6 @@
|
||||
"block_size": 128,
|
||||
"trust_remote_code": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
@ -117,7 +114,6 @@
|
||||
"trust_remote_code": "",
|
||||
"enable_chunked_prefill": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
@ -153,7 +149,6 @@
|
||||
"trust_remote_code": "",
|
||||
"enable_chunked_prefill": "",
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"enforce_eager": "",
|
||||
"max_num_batched_tokens": 2048,
|
||||
"max_num_seqs": 256,
|
||||
|
||||
@ -7,7 +7,6 @@
|
||||
"tensor_parallel_size": 1,
|
||||
"swap_space": 16,
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"load_format": "dummy"
|
||||
},
|
||||
"client_parameters": {
|
||||
@ -26,7 +25,6 @@
|
||||
"tensor_parallel_size": 4,
|
||||
"swap_space": 16,
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"load_format": "dummy"
|
||||
},
|
||||
"client_parameters": {
|
||||
@ -45,7 +43,6 @@
|
||||
"tensor_parallel_size": 2,
|
||||
"swap_space": 16,
|
||||
"disable_log_stats": "",
|
||||
"disable_log_requests": "",
|
||||
"load_format": "dummy"
|
||||
},
|
||||
"client_parameters": {
|
||||
@ -60,8 +57,7 @@
|
||||
"test_name": "serving_llama70B_tp4_sharegpt_specdecode",
|
||||
"qps_list": [2],
|
||||
"server_parameters": {
|
||||
"model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
||||
"disable_log_requests": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
||||
"tensor_parallel_size": 4,
|
||||
"swap_space": 16,
|
||||
"speculative_config": {
|
||||
|
||||
@ -28,7 +28,6 @@ def test_mp_reducer(monkeypatch):
|
||||
max_model_len=32,
|
||||
gpu_memory_utilization=0.1,
|
||||
disable_log_stats=True,
|
||||
disable_log_requests=True,
|
||||
)
|
||||
|
||||
async_llm = AsyncLLM.from_engine_args(
|
||||
|
||||
@ -16,7 +16,7 @@ NUM_EXPECTED_TOKENS = 10
|
||||
NUM_REQUESTS = 10000
|
||||
|
||||
# Scenarios to test for num generated token.
|
||||
ENGINE_ARGS = AsyncEngineArgs(model=MODEL, disable_log_requests=True)
|
||||
ENGINE_ARGS = AsyncEngineArgs(model=MODEL)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
|
||||
@ -26,12 +26,10 @@ if not current_platform.is_cuda():
|
||||
TEXT_ENGINE_ARGS = AsyncEngineArgs(
|
||||
model="meta-llama/Llama-3.2-1B-Instruct",
|
||||
enforce_eager=True,
|
||||
disable_log_requests=True,
|
||||
)
|
||||
|
||||
VISION_ENGINE_ARGS = AsyncEngineArgs(model="Qwen/Qwen2-VL-2B-Instruct",
|
||||
enforce_eager=True,
|
||||
disable_log_requests=True)
|
||||
enforce_eager=True)
|
||||
|
||||
TEXT_PROMPT = "Hello my name is Robert and"
|
||||
|
||||
|
||||
@ -25,7 +25,6 @@ DP_SIZE = int(os.getenv("DP_SIZE", 2))
|
||||
engine_args = AsyncEngineArgs(
|
||||
model="ibm-research/PowerMoE-3b",
|
||||
enforce_eager=True,
|
||||
disable_log_requests=True,
|
||||
tensor_parallel_size=int(os.getenv("TP_SIZE", 1)),
|
||||
data_parallel_size=DP_SIZE,
|
||||
)
|
||||
|
||||
@ -18,7 +18,7 @@ from typing import (TYPE_CHECKING, Annotated, Any, Callable, Dict, List,
|
||||
import regex as re
|
||||
import torch
|
||||
from pydantic import TypeAdapter, ValidationError
|
||||
from typing_extensions import TypeIs
|
||||
from typing_extensions import TypeIs, deprecated
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
|
||||
@ -1704,7 +1704,23 @@ class EngineArgs:
|
||||
@dataclass
|
||||
class AsyncEngineArgs(EngineArgs):
|
||||
"""Arguments for asynchronous vLLM engine."""
|
||||
disable_log_requests: bool = False
|
||||
enable_log_requests: bool = False
|
||||
|
||||
@property
|
||||
@deprecated(
|
||||
"`disable_log_requests` is deprecated and has been replaced with "
|
||||
"`enable_log_requests`. This will be removed in v0.12.0. Please use "
|
||||
"`enable_log_requests` instead.")
|
||||
def disable_log_requests(self) -> bool:
|
||||
return not self.enable_log_requests
|
||||
|
||||
@disable_log_requests.setter
|
||||
@deprecated(
|
||||
"`disable_log_requests` is deprecated and has been replaced with "
|
||||
"`enable_log_requests`. This will be removed in v0.12.0. Please use "
|
||||
"`enable_log_requests` instead.")
|
||||
def disable_log_requests(self, value: bool):
|
||||
self.enable_log_requests = not value
|
||||
|
||||
@staticmethod
|
||||
def add_cli_args(parser: FlexibleArgumentParser,
|
||||
@ -1715,9 +1731,15 @@ class AsyncEngineArgs(EngineArgs):
|
||||
load_general_plugins()
|
||||
if not async_args_only:
|
||||
parser = EngineArgs.add_cli_args(parser)
|
||||
parser.add_argument('--enable-log-requests',
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=AsyncEngineArgs.enable_log_requests,
|
||||
help='Enable logging requests.')
|
||||
parser.add_argument('--disable-log-requests',
|
||||
action='store_true',
|
||||
help='Disable logging requests.')
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=not AsyncEngineArgs.enable_log_requests,
|
||||
help='[DEPRECATED] Disable logging requests.',
|
||||
deprecated=True)
|
||||
current_platform.pre_register_and_update(parser)
|
||||
return parser
|
||||
|
||||
|
||||
@ -30,7 +30,7 @@ from vllm.sampling_params import SamplingParams
|
||||
from vllm.sequence import ExecuteModelRequest
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils import Device, weak_bind
|
||||
from vllm.utils import Device, deprecate_kwargs, weak_bind
|
||||
|
||||
logger = init_logger(__name__)
|
||||
ENGINE_ITERATION_TIMEOUT_S = envs.VLLM_ENGINE_ITERATION_TIMEOUT_S
|
||||
@ -554,14 +554,20 @@ class AsyncLLMEngine(EngineClient):
|
||||
return LLMEngine._get_executor_cls(engine_config)
|
||||
|
||||
@classmethod
|
||||
@deprecate_kwargs(
|
||||
"disable_log_requests",
|
||||
additional_message=("This argument will have no effect. "
|
||||
"Use `enable_log_requests` instead."),
|
||||
)
|
||||
def from_vllm_config(
|
||||
cls,
|
||||
vllm_config: VllmConfig,
|
||||
start_engine_loop: bool = True,
|
||||
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
||||
stat_loggers: Optional[dict[str, StatLoggerBase]] = None,
|
||||
disable_log_requests: bool = False,
|
||||
disable_log_stats: bool = False,
|
||||
cls,
|
||||
vllm_config: VllmConfig,
|
||||
start_engine_loop: bool = True,
|
||||
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
||||
stat_loggers: Optional[dict[str, StatLoggerBase]] = None,
|
||||
enable_log_requests: bool = False,
|
||||
disable_log_stats: bool = False,
|
||||
disable_log_requests: bool = True, # Deprecated, will be removed
|
||||
) -> "AsyncLLMEngine":
|
||||
"""Create an AsyncLLMEngine from the EngineArgs."""
|
||||
|
||||
@ -569,7 +575,7 @@ class AsyncLLMEngine(EngineClient):
|
||||
vllm_config=vllm_config,
|
||||
executor_class=cls._get_executor_cls(vllm_config),
|
||||
start_engine_loop=start_engine_loop,
|
||||
log_requests=not disable_log_requests,
|
||||
log_requests=enable_log_requests,
|
||||
log_stats=not disable_log_stats,
|
||||
usage_context=usage_context,
|
||||
stat_loggers=stat_loggers,
|
||||
@ -598,7 +604,7 @@ class AsyncLLMEngine(EngineClient):
|
||||
usage_context=usage_context,
|
||||
stat_loggers=stat_loggers,
|
||||
disable_log_stats=engine_args.disable_log_stats,
|
||||
disable_log_requests=engine_args.disable_log_requests,
|
||||
enable_log_requests=engine_args.enable_log_requests,
|
||||
)
|
||||
|
||||
@property
|
||||
|
||||
@ -34,6 +34,7 @@ from vllm.outputs import RequestOutput
|
||||
from vllm.transformers_utils.config import (
|
||||
maybe_register_config_serialize_by_value)
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils import deprecate_kwargs
|
||||
from vllm.worker.model_runner_base import InputProcessingError
|
||||
|
||||
logger = init_logger(__name__)
|
||||
@ -120,10 +121,20 @@ class MQLLMEngine:
|
||||
return ENGINE_DEAD_ERROR()
|
||||
|
||||
@classmethod
|
||||
def from_vllm_config(cls, vllm_config: VllmConfig,
|
||||
usage_context: UsageContext,
|
||||
disable_log_requests: bool, disable_log_stats: bool,
|
||||
ipc_path: str) -> "MQLLMEngine":
|
||||
@deprecate_kwargs(
|
||||
"disable_log_requests",
|
||||
additional_message=("This argument will have no effect. "
|
||||
"Use `enable_log_requests` instead."),
|
||||
)
|
||||
def from_vllm_config(
|
||||
cls,
|
||||
vllm_config: VllmConfig,
|
||||
usage_context: UsageContext,
|
||||
enable_log_requests: bool,
|
||||
disable_log_stats: bool,
|
||||
ipc_path: str,
|
||||
disable_log_requests: bool = True, # Deprecated, will be removed
|
||||
) -> "MQLLMEngine":
|
||||
# Setup plugins for each process
|
||||
from vllm.plugins import load_general_plugins
|
||||
load_general_plugins()
|
||||
@ -136,7 +147,7 @@ class MQLLMEngine:
|
||||
ipc_path=ipc_path,
|
||||
usage_context=usage_context,
|
||||
use_async_sockets=use_async_sockets,
|
||||
log_requests=(not disable_log_requests),
|
||||
log_requests=enable_log_requests,
|
||||
log_stats=(not disable_log_stats),
|
||||
)
|
||||
|
||||
@ -150,7 +161,7 @@ class MQLLMEngine:
|
||||
ipc_path=ipc_path,
|
||||
vllm_config=vllm_config,
|
||||
usage_context=usage_context,
|
||||
disable_log_requests=engine_args.disable_log_requests,
|
||||
enable_log_requests=engine_args.enable_log_requests,
|
||||
disable_log_stats=engine_args.disable_log_stats,
|
||||
)
|
||||
|
||||
@ -436,7 +447,7 @@ def signal_handler(*_) -> None:
|
||||
|
||||
def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext,
|
||||
ipc_path: str, disable_log_stats: bool,
|
||||
disable_log_requests: bool, engine_alive):
|
||||
enable_log_requests: bool, engine_alive):
|
||||
try:
|
||||
# Ensure we can serialize transformer config before spawning
|
||||
maybe_register_config_serialize_by_value()
|
||||
@ -445,7 +456,7 @@ def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext,
|
||||
vllm_config=vllm_config,
|
||||
usage_context=usage_context,
|
||||
disable_log_stats=disable_log_stats,
|
||||
disable_log_requests=disable_log_requests,
|
||||
enable_log_requests=enable_log_requests,
|
||||
ipc_path=ipc_path)
|
||||
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
@ -205,7 +205,7 @@ async def build_async_engine_client_from_engine_args(
|
||||
async_llm = AsyncLLM.from_vllm_config(
|
||||
vllm_config=vllm_config,
|
||||
usage_context=usage_context,
|
||||
disable_log_requests=engine_args.disable_log_requests,
|
||||
enable_log_requests=engine_args.enable_log_requests,
|
||||
disable_log_stats=engine_args.disable_log_stats,
|
||||
client_addresses=client_config,
|
||||
client_index=client_index)
|
||||
@ -227,7 +227,7 @@ async def build_async_engine_client_from_engine_args(
|
||||
engine_client = AsyncLLMEngine.from_vllm_config(
|
||||
vllm_config=vllm_config,
|
||||
usage_context=usage_context,
|
||||
disable_log_requests=engine_args.disable_log_requests,
|
||||
enable_log_requests=engine_args.enable_log_requests,
|
||||
disable_log_stats=engine_args.disable_log_stats)
|
||||
yield engine_client
|
||||
finally:
|
||||
@ -272,7 +272,7 @@ async def build_async_engine_client_from_engine_args(
|
||||
target=run_mp_engine,
|
||||
args=(vllm_config, UsageContext.OPENAI_API_SERVER, ipc_path,
|
||||
engine_args.disable_log_stats,
|
||||
engine_args.disable_log_requests, engine_alive))
|
||||
engine_args.enable_log_requests, engine_alive))
|
||||
engine_process.start()
|
||||
engine_pid = engine_process.pid
|
||||
assert engine_pid is not None, "Engine process failed to start."
|
||||
@ -1570,10 +1570,10 @@ async def init_app_state(
|
||||
else:
|
||||
served_model_names = [args.model]
|
||||
|
||||
if args.disable_log_requests:
|
||||
request_logger = None
|
||||
else:
|
||||
if args.enable_log_requests:
|
||||
request_logger = RequestLogger(max_log_len=args.max_log_len)
|
||||
else:
|
||||
request_logger = None
|
||||
|
||||
base_model_paths = [
|
||||
BaseModelPath(name=name, model_path=args.model)
|
||||
|
||||
@ -324,10 +324,10 @@ async def run_batch(
|
||||
else:
|
||||
served_model_names = [args.model]
|
||||
|
||||
if args.disable_log_requests:
|
||||
request_logger = None
|
||||
else:
|
||||
if args.enable_log_requests:
|
||||
request_logger = RequestLogger(max_log_len=args.max_log_len)
|
||||
else:
|
||||
request_logger = None
|
||||
|
||||
base_model_paths = [
|
||||
BaseModelPath(name=name, model_path=args.model)
|
||||
|
||||
@ -1668,6 +1668,12 @@ class FlexibleArgumentParser(ArgumentParser):
|
||||
# Enable the deprecated kwarg for Python 3.12 and below
|
||||
|
||||
def parse_known_args(self, args=None, namespace=None):
|
||||
if args is not None and "--disable-log-requests" in args:
|
||||
# Special case warning because the warning below won't trigger
|
||||
# if –-disable-log-requests because its value is default.
|
||||
logger.warning_once(
|
||||
"argument '--disable-log-requests' is deprecated. This "
|
||||
"will be removed in v0.12.0.")
|
||||
namespace, args = super().parse_known_args(args, namespace)
|
||||
for action in FlexibleArgumentParser._deprecated:
|
||||
if (hasattr(namespace, dest := action.dest)
|
||||
|
||||
@ -27,7 +27,7 @@ from vllm.transformers_utils.config import (
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils import Device, cdiv
|
||||
from vllm.utils import Device, cdiv, deprecate_kwargs
|
||||
from vllm.v1.engine import EngineCoreRequest
|
||||
from vllm.v1.engine.core_client import EngineCoreClient
|
||||
from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError
|
||||
@ -142,16 +142,22 @@ class AsyncLLM(EngineClient):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
@deprecate_kwargs(
|
||||
"disable_log_requests",
|
||||
additional_message=("This argument will have no effect. "
|
||||
"Use `enable_log_requests` instead."),
|
||||
)
|
||||
def from_vllm_config(
|
||||
cls,
|
||||
vllm_config: VllmConfig,
|
||||
start_engine_loop: bool = True,
|
||||
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
||||
stat_loggers: Optional[list[StatLoggerFactory]] = None,
|
||||
disable_log_requests: bool = False,
|
||||
disable_log_stats: bool = False,
|
||||
client_addresses: Optional[dict[str, str]] = None,
|
||||
client_index: int = 0,
|
||||
cls,
|
||||
vllm_config: VllmConfig,
|
||||
start_engine_loop: bool = True,
|
||||
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
||||
stat_loggers: Optional[list[StatLoggerFactory]] = None,
|
||||
enable_log_requests: bool = False,
|
||||
disable_log_stats: bool = False,
|
||||
client_addresses: Optional[dict[str, str]] = None,
|
||||
client_index: int = 0,
|
||||
disable_log_requests: bool = True, # Deprecated, will be removed
|
||||
) -> "AsyncLLM":
|
||||
if not envs.VLLM_USE_V1:
|
||||
raise ValueError(
|
||||
@ -166,7 +172,7 @@ class AsyncLLM(EngineClient):
|
||||
executor_class=Executor.get_class(vllm_config),
|
||||
start_engine_loop=start_engine_loop,
|
||||
stat_loggers=stat_loggers,
|
||||
log_requests=not disable_log_requests,
|
||||
log_requests=enable_log_requests,
|
||||
log_stats=not disable_log_stats,
|
||||
usage_context=usage_context,
|
||||
client_addresses=client_addresses,
|
||||
@ -191,7 +197,7 @@ class AsyncLLM(EngineClient):
|
||||
return cls(
|
||||
vllm_config=vllm_config,
|
||||
executor_class=executor_class,
|
||||
log_requests=not engine_args.disable_log_requests,
|
||||
log_requests=engine_args.enable_log_requests,
|
||||
log_stats=not engine_args.disable_log_stats,
|
||||
start_engine_loop=start_engine_loop,
|
||||
usage_context=usage_context,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user