mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-12 13:25:50 +08:00
Deprecate --disable-log-requests and replace with --enable-log-requests (#21739)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
97608dc276
commit
2d7b09b998
@ -104,7 +104,6 @@ We test the throughput by using `vllm bench serve` with request rate = inf to co
|
|||||||
"tensor_parallel_size": 1,
|
"tensor_parallel_size": 1,
|
||||||
"swap_space": 16,
|
"swap_space": 16,
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"load_format": "dummy"
|
"load_format": "dummy"
|
||||||
},
|
},
|
||||||
"client_parameters": {
|
"client_parameters": {
|
||||||
|
|||||||
@ -11,7 +11,6 @@
|
|||||||
},
|
},
|
||||||
"vllm_server_parameters": {
|
"vllm_server_parameters": {
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"gpu_memory_utilization": 0.9,
|
"gpu_memory_utilization": 0.9,
|
||||||
"num_scheduler_steps": 10,
|
"num_scheduler_steps": 10,
|
||||||
"max_num_seqs": 512,
|
"max_num_seqs": 512,
|
||||||
|
|||||||
@ -35,7 +35,6 @@
|
|||||||
},
|
},
|
||||||
"vllm_server_parameters": {
|
"vllm_server_parameters": {
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"gpu_memory_utilization": 0.9,
|
"gpu_memory_utilization": 0.9,
|
||||||
"num_scheduler_steps": 10,
|
"num_scheduler_steps": 10,
|
||||||
"max_num_seqs": 512,
|
"max_num_seqs": 512,
|
||||||
@ -90,7 +89,6 @@
|
|||||||
},
|
},
|
||||||
"vllm_server_parameters": {
|
"vllm_server_parameters": {
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"gpu_memory_utilization": 0.9,
|
"gpu_memory_utilization": 0.9,
|
||||||
"num_scheduler_steps": 10,
|
"num_scheduler_steps": 10,
|
||||||
"max_num_seqs": 512,
|
"max_num_seqs": 512,
|
||||||
@ -145,7 +143,6 @@
|
|||||||
},
|
},
|
||||||
"vllm_server_parameters": {
|
"vllm_server_parameters": {
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"gpu_memory_utilization": 0.9,
|
"gpu_memory_utilization": 0.9,
|
||||||
"num_scheduler_steps": 10,
|
"num_scheduler_steps": 10,
|
||||||
"max_num_seqs": 512,
|
"max_num_seqs": 512,
|
||||||
@ -197,7 +194,6 @@
|
|||||||
},
|
},
|
||||||
"vllm_server_parameters": {
|
"vllm_server_parameters": {
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"gpu_memory_utilization": 0.9,
|
"gpu_memory_utilization": 0.9,
|
||||||
"num_scheduler_steps": 10,
|
"num_scheduler_steps": 10,
|
||||||
"max_num_seqs": 512,
|
"max_num_seqs": 512,
|
||||||
@ -251,7 +247,6 @@
|
|||||||
},
|
},
|
||||||
"vllm_server_parameters": {
|
"vllm_server_parameters": {
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"gpu_memory_utilization": 0.9,
|
"gpu_memory_utilization": 0.9,
|
||||||
"num_scheduler_steps": 10,
|
"num_scheduler_steps": 10,
|
||||||
"max_num_seqs": 512,
|
"max_num_seqs": 512,
|
||||||
@ -305,7 +300,6 @@
|
|||||||
},
|
},
|
||||||
"vllm_server_parameters": {
|
"vllm_server_parameters": {
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"gpu_memory_utilization": 0.9,
|
"gpu_memory_utilization": 0.9,
|
||||||
"num_scheduler_steps": 10,
|
"num_scheduler_steps": 10,
|
||||||
"max_num_seqs": 512,
|
"max_num_seqs": 512,
|
||||||
|
|||||||
@ -17,7 +17,6 @@
|
|||||||
"block_size": 128,
|
"block_size": 128,
|
||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
@ -50,7 +49,6 @@
|
|||||||
"block_size": 128,
|
"block_size": 128,
|
||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
@ -83,7 +81,6 @@
|
|||||||
"block_size": 128,
|
"block_size": 128,
|
||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
@ -117,7 +114,6 @@
|
|||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"enable_chunked_prefill": "",
|
"enable_chunked_prefill": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
@ -153,7 +149,6 @@
|
|||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"enable_chunked_prefill": "",
|
"enable_chunked_prefill": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
@ -189,7 +184,6 @@
|
|||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"enable_chunked_prefill": "",
|
"enable_chunked_prefill": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
|
|||||||
@ -17,7 +17,6 @@
|
|||||||
"block_size": 128,
|
"block_size": 128,
|
||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
@ -50,7 +49,6 @@
|
|||||||
"block_size": 128,
|
"block_size": 128,
|
||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
@ -84,7 +82,6 @@
|
|||||||
"block_size": 128,
|
"block_size": 128,
|
||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
@ -118,7 +115,6 @@
|
|||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"enable_chunked_prefill": "",
|
"enable_chunked_prefill": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
@ -154,7 +150,6 @@
|
|||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"enable_chunked_prefill": "",
|
"enable_chunked_prefill": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
@ -191,7 +186,6 @@
|
|||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"enable_chunked_prefill": "",
|
"enable_chunked_prefill": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
|
|||||||
@ -17,7 +17,6 @@
|
|||||||
"block_size": 128,
|
"block_size": 128,
|
||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
@ -50,7 +49,6 @@
|
|||||||
"block_size": 128,
|
"block_size": 128,
|
||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
@ -83,7 +81,6 @@
|
|||||||
"block_size": 128,
|
"block_size": 128,
|
||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
@ -117,7 +114,6 @@
|
|||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"enable_chunked_prefill": "",
|
"enable_chunked_prefill": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
@ -153,7 +149,6 @@
|
|||||||
"trust_remote_code": "",
|
"trust_remote_code": "",
|
||||||
"enable_chunked_prefill": "",
|
"enable_chunked_prefill": "",
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"enforce_eager": "",
|
"enforce_eager": "",
|
||||||
"max_num_batched_tokens": 2048,
|
"max_num_batched_tokens": 2048,
|
||||||
"max_num_seqs": 256,
|
"max_num_seqs": 256,
|
||||||
|
|||||||
@ -7,7 +7,6 @@
|
|||||||
"tensor_parallel_size": 1,
|
"tensor_parallel_size": 1,
|
||||||
"swap_space": 16,
|
"swap_space": 16,
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"load_format": "dummy"
|
"load_format": "dummy"
|
||||||
},
|
},
|
||||||
"client_parameters": {
|
"client_parameters": {
|
||||||
@ -26,7 +25,6 @@
|
|||||||
"tensor_parallel_size": 4,
|
"tensor_parallel_size": 4,
|
||||||
"swap_space": 16,
|
"swap_space": 16,
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"load_format": "dummy"
|
"load_format": "dummy"
|
||||||
},
|
},
|
||||||
"client_parameters": {
|
"client_parameters": {
|
||||||
@ -45,7 +43,6 @@
|
|||||||
"tensor_parallel_size": 2,
|
"tensor_parallel_size": 2,
|
||||||
"swap_space": 16,
|
"swap_space": 16,
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
"disable_log_requests": "",
|
|
||||||
"load_format": "dummy"
|
"load_format": "dummy"
|
||||||
},
|
},
|
||||||
"client_parameters": {
|
"client_parameters": {
|
||||||
@ -60,8 +57,7 @@
|
|||||||
"test_name": "serving_llama70B_tp4_sharegpt_specdecode",
|
"test_name": "serving_llama70B_tp4_sharegpt_specdecode",
|
||||||
"qps_list": [2],
|
"qps_list": [2],
|
||||||
"server_parameters": {
|
"server_parameters": {
|
||||||
"model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
"model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
||||||
"disable_log_requests": "",
|
|
||||||
"tensor_parallel_size": 4,
|
"tensor_parallel_size": 4,
|
||||||
"swap_space": 16,
|
"swap_space": 16,
|
||||||
"speculative_config": {
|
"speculative_config": {
|
||||||
|
|||||||
@ -28,7 +28,6 @@ def test_mp_reducer(monkeypatch):
|
|||||||
max_model_len=32,
|
max_model_len=32,
|
||||||
gpu_memory_utilization=0.1,
|
gpu_memory_utilization=0.1,
|
||||||
disable_log_stats=True,
|
disable_log_stats=True,
|
||||||
disable_log_requests=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
async_llm = AsyncLLM.from_engine_args(
|
async_llm = AsyncLLM.from_engine_args(
|
||||||
|
|||||||
@ -16,7 +16,7 @@ NUM_EXPECTED_TOKENS = 10
|
|||||||
NUM_REQUESTS = 10000
|
NUM_REQUESTS = 10000
|
||||||
|
|
||||||
# Scenarios to test for num generated token.
|
# Scenarios to test for num generated token.
|
||||||
ENGINE_ARGS = AsyncEngineArgs(model=MODEL, disable_log_requests=True)
|
ENGINE_ARGS = AsyncEngineArgs(model=MODEL)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
@pytest.fixture(scope="function")
|
||||||
|
|||||||
@ -26,12 +26,10 @@ if not current_platform.is_cuda():
|
|||||||
TEXT_ENGINE_ARGS = AsyncEngineArgs(
|
TEXT_ENGINE_ARGS = AsyncEngineArgs(
|
||||||
model="meta-llama/Llama-3.2-1B-Instruct",
|
model="meta-llama/Llama-3.2-1B-Instruct",
|
||||||
enforce_eager=True,
|
enforce_eager=True,
|
||||||
disable_log_requests=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
VISION_ENGINE_ARGS = AsyncEngineArgs(model="Qwen/Qwen2-VL-2B-Instruct",
|
VISION_ENGINE_ARGS = AsyncEngineArgs(model="Qwen/Qwen2-VL-2B-Instruct",
|
||||||
enforce_eager=True,
|
enforce_eager=True)
|
||||||
disable_log_requests=True)
|
|
||||||
|
|
||||||
TEXT_PROMPT = "Hello my name is Robert and"
|
TEXT_PROMPT = "Hello my name is Robert and"
|
||||||
|
|
||||||
|
|||||||
@ -25,7 +25,6 @@ DP_SIZE = int(os.getenv("DP_SIZE", 2))
|
|||||||
engine_args = AsyncEngineArgs(
|
engine_args = AsyncEngineArgs(
|
||||||
model="ibm-research/PowerMoE-3b",
|
model="ibm-research/PowerMoE-3b",
|
||||||
enforce_eager=True,
|
enforce_eager=True,
|
||||||
disable_log_requests=True,
|
|
||||||
tensor_parallel_size=int(os.getenv("TP_SIZE", 1)),
|
tensor_parallel_size=int(os.getenv("TP_SIZE", 1)),
|
||||||
data_parallel_size=DP_SIZE,
|
data_parallel_size=DP_SIZE,
|
||||||
)
|
)
|
||||||
|
|||||||
@ -18,7 +18,7 @@ from typing import (TYPE_CHECKING, Annotated, Any, Callable, Dict, List,
|
|||||||
import regex as re
|
import regex as re
|
||||||
import torch
|
import torch
|
||||||
from pydantic import TypeAdapter, ValidationError
|
from pydantic import TypeAdapter, ValidationError
|
||||||
from typing_extensions import TypeIs
|
from typing_extensions import TypeIs, deprecated
|
||||||
|
|
||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
|
from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
|
||||||
@ -1704,7 +1704,23 @@ class EngineArgs:
|
|||||||
@dataclass
|
@dataclass
|
||||||
class AsyncEngineArgs(EngineArgs):
|
class AsyncEngineArgs(EngineArgs):
|
||||||
"""Arguments for asynchronous vLLM engine."""
|
"""Arguments for asynchronous vLLM engine."""
|
||||||
disable_log_requests: bool = False
|
enable_log_requests: bool = False
|
||||||
|
|
||||||
|
@property
|
||||||
|
@deprecated(
|
||||||
|
"`disable_log_requests` is deprecated and has been replaced with "
|
||||||
|
"`enable_log_requests`. This will be removed in v0.12.0. Please use "
|
||||||
|
"`enable_log_requests` instead.")
|
||||||
|
def disable_log_requests(self) -> bool:
|
||||||
|
return not self.enable_log_requests
|
||||||
|
|
||||||
|
@disable_log_requests.setter
|
||||||
|
@deprecated(
|
||||||
|
"`disable_log_requests` is deprecated and has been replaced with "
|
||||||
|
"`enable_log_requests`. This will be removed in v0.12.0. Please use "
|
||||||
|
"`enable_log_requests` instead.")
|
||||||
|
def disable_log_requests(self, value: bool):
|
||||||
|
self.enable_log_requests = not value
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def add_cli_args(parser: FlexibleArgumentParser,
|
def add_cli_args(parser: FlexibleArgumentParser,
|
||||||
@ -1715,9 +1731,15 @@ class AsyncEngineArgs(EngineArgs):
|
|||||||
load_general_plugins()
|
load_general_plugins()
|
||||||
if not async_args_only:
|
if not async_args_only:
|
||||||
parser = EngineArgs.add_cli_args(parser)
|
parser = EngineArgs.add_cli_args(parser)
|
||||||
|
parser.add_argument('--enable-log-requests',
|
||||||
|
action=argparse.BooleanOptionalAction,
|
||||||
|
default=AsyncEngineArgs.enable_log_requests,
|
||||||
|
help='Enable logging requests.')
|
||||||
parser.add_argument('--disable-log-requests',
|
parser.add_argument('--disable-log-requests',
|
||||||
action='store_true',
|
action=argparse.BooleanOptionalAction,
|
||||||
help='Disable logging requests.')
|
default=not AsyncEngineArgs.enable_log_requests,
|
||||||
|
help='[DEPRECATED] Disable logging requests.',
|
||||||
|
deprecated=True)
|
||||||
current_platform.pre_register_and_update(parser)
|
current_platform.pre_register_and_update(parser)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|||||||
@ -30,7 +30,7 @@ from vllm.sampling_params import SamplingParams
|
|||||||
from vllm.sequence import ExecuteModelRequest
|
from vllm.sequence import ExecuteModelRequest
|
||||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||||
from vllm.usage.usage_lib import UsageContext
|
from vllm.usage.usage_lib import UsageContext
|
||||||
from vllm.utils import Device, weak_bind
|
from vllm.utils import Device, deprecate_kwargs, weak_bind
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
ENGINE_ITERATION_TIMEOUT_S = envs.VLLM_ENGINE_ITERATION_TIMEOUT_S
|
ENGINE_ITERATION_TIMEOUT_S = envs.VLLM_ENGINE_ITERATION_TIMEOUT_S
|
||||||
@ -554,14 +554,20 @@ class AsyncLLMEngine(EngineClient):
|
|||||||
return LLMEngine._get_executor_cls(engine_config)
|
return LLMEngine._get_executor_cls(engine_config)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@deprecate_kwargs(
|
||||||
|
"disable_log_requests",
|
||||||
|
additional_message=("This argument will have no effect. "
|
||||||
|
"Use `enable_log_requests` instead."),
|
||||||
|
)
|
||||||
def from_vllm_config(
|
def from_vllm_config(
|
||||||
cls,
|
cls,
|
||||||
vllm_config: VllmConfig,
|
vllm_config: VllmConfig,
|
||||||
start_engine_loop: bool = True,
|
start_engine_loop: bool = True,
|
||||||
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
||||||
stat_loggers: Optional[dict[str, StatLoggerBase]] = None,
|
stat_loggers: Optional[dict[str, StatLoggerBase]] = None,
|
||||||
disable_log_requests: bool = False,
|
enable_log_requests: bool = False,
|
||||||
disable_log_stats: bool = False,
|
disable_log_stats: bool = False,
|
||||||
|
disable_log_requests: bool = True, # Deprecated, will be removed
|
||||||
) -> "AsyncLLMEngine":
|
) -> "AsyncLLMEngine":
|
||||||
"""Create an AsyncLLMEngine from the EngineArgs."""
|
"""Create an AsyncLLMEngine from the EngineArgs."""
|
||||||
|
|
||||||
@ -569,7 +575,7 @@ class AsyncLLMEngine(EngineClient):
|
|||||||
vllm_config=vllm_config,
|
vllm_config=vllm_config,
|
||||||
executor_class=cls._get_executor_cls(vllm_config),
|
executor_class=cls._get_executor_cls(vllm_config),
|
||||||
start_engine_loop=start_engine_loop,
|
start_engine_loop=start_engine_loop,
|
||||||
log_requests=not disable_log_requests,
|
log_requests=enable_log_requests,
|
||||||
log_stats=not disable_log_stats,
|
log_stats=not disable_log_stats,
|
||||||
usage_context=usage_context,
|
usage_context=usage_context,
|
||||||
stat_loggers=stat_loggers,
|
stat_loggers=stat_loggers,
|
||||||
@ -598,7 +604,7 @@ class AsyncLLMEngine(EngineClient):
|
|||||||
usage_context=usage_context,
|
usage_context=usage_context,
|
||||||
stat_loggers=stat_loggers,
|
stat_loggers=stat_loggers,
|
||||||
disable_log_stats=engine_args.disable_log_stats,
|
disable_log_stats=engine_args.disable_log_stats,
|
||||||
disable_log_requests=engine_args.disable_log_requests,
|
enable_log_requests=engine_args.enable_log_requests,
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|||||||
@ -34,6 +34,7 @@ from vllm.outputs import RequestOutput
|
|||||||
from vllm.transformers_utils.config import (
|
from vllm.transformers_utils.config import (
|
||||||
maybe_register_config_serialize_by_value)
|
maybe_register_config_serialize_by_value)
|
||||||
from vllm.usage.usage_lib import UsageContext
|
from vllm.usage.usage_lib import UsageContext
|
||||||
|
from vllm.utils import deprecate_kwargs
|
||||||
from vllm.worker.model_runner_base import InputProcessingError
|
from vllm.worker.model_runner_base import InputProcessingError
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
@ -120,10 +121,20 @@ class MQLLMEngine:
|
|||||||
return ENGINE_DEAD_ERROR()
|
return ENGINE_DEAD_ERROR()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_vllm_config(cls, vllm_config: VllmConfig,
|
@deprecate_kwargs(
|
||||||
usage_context: UsageContext,
|
"disable_log_requests",
|
||||||
disable_log_requests: bool, disable_log_stats: bool,
|
additional_message=("This argument will have no effect. "
|
||||||
ipc_path: str) -> "MQLLMEngine":
|
"Use `enable_log_requests` instead."),
|
||||||
|
)
|
||||||
|
def from_vllm_config(
|
||||||
|
cls,
|
||||||
|
vllm_config: VllmConfig,
|
||||||
|
usage_context: UsageContext,
|
||||||
|
enable_log_requests: bool,
|
||||||
|
disable_log_stats: bool,
|
||||||
|
ipc_path: str,
|
||||||
|
disable_log_requests: bool = True, # Deprecated, will be removed
|
||||||
|
) -> "MQLLMEngine":
|
||||||
# Setup plugins for each process
|
# Setup plugins for each process
|
||||||
from vllm.plugins import load_general_plugins
|
from vllm.plugins import load_general_plugins
|
||||||
load_general_plugins()
|
load_general_plugins()
|
||||||
@ -136,7 +147,7 @@ class MQLLMEngine:
|
|||||||
ipc_path=ipc_path,
|
ipc_path=ipc_path,
|
||||||
usage_context=usage_context,
|
usage_context=usage_context,
|
||||||
use_async_sockets=use_async_sockets,
|
use_async_sockets=use_async_sockets,
|
||||||
log_requests=(not disable_log_requests),
|
log_requests=enable_log_requests,
|
||||||
log_stats=(not disable_log_stats),
|
log_stats=(not disable_log_stats),
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -150,7 +161,7 @@ class MQLLMEngine:
|
|||||||
ipc_path=ipc_path,
|
ipc_path=ipc_path,
|
||||||
vllm_config=vllm_config,
|
vllm_config=vllm_config,
|
||||||
usage_context=usage_context,
|
usage_context=usage_context,
|
||||||
disable_log_requests=engine_args.disable_log_requests,
|
enable_log_requests=engine_args.enable_log_requests,
|
||||||
disable_log_stats=engine_args.disable_log_stats,
|
disable_log_stats=engine_args.disable_log_stats,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -436,7 +447,7 @@ def signal_handler(*_) -> None:
|
|||||||
|
|
||||||
def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext,
|
def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext,
|
||||||
ipc_path: str, disable_log_stats: bool,
|
ipc_path: str, disable_log_stats: bool,
|
||||||
disable_log_requests: bool, engine_alive):
|
enable_log_requests: bool, engine_alive):
|
||||||
try:
|
try:
|
||||||
# Ensure we can serialize transformer config before spawning
|
# Ensure we can serialize transformer config before spawning
|
||||||
maybe_register_config_serialize_by_value()
|
maybe_register_config_serialize_by_value()
|
||||||
@ -445,7 +456,7 @@ def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext,
|
|||||||
vllm_config=vllm_config,
|
vllm_config=vllm_config,
|
||||||
usage_context=usage_context,
|
usage_context=usage_context,
|
||||||
disable_log_stats=disable_log_stats,
|
disable_log_stats=disable_log_stats,
|
||||||
disable_log_requests=disable_log_requests,
|
enable_log_requests=enable_log_requests,
|
||||||
ipc_path=ipc_path)
|
ipc_path=ipc_path)
|
||||||
|
|
||||||
signal.signal(signal.SIGTERM, signal_handler)
|
signal.signal(signal.SIGTERM, signal_handler)
|
||||||
|
|||||||
@ -205,7 +205,7 @@ async def build_async_engine_client_from_engine_args(
|
|||||||
async_llm = AsyncLLM.from_vllm_config(
|
async_llm = AsyncLLM.from_vllm_config(
|
||||||
vllm_config=vllm_config,
|
vllm_config=vllm_config,
|
||||||
usage_context=usage_context,
|
usage_context=usage_context,
|
||||||
disable_log_requests=engine_args.disable_log_requests,
|
enable_log_requests=engine_args.enable_log_requests,
|
||||||
disable_log_stats=engine_args.disable_log_stats,
|
disable_log_stats=engine_args.disable_log_stats,
|
||||||
client_addresses=client_config,
|
client_addresses=client_config,
|
||||||
client_index=client_index)
|
client_index=client_index)
|
||||||
@ -227,7 +227,7 @@ async def build_async_engine_client_from_engine_args(
|
|||||||
engine_client = AsyncLLMEngine.from_vllm_config(
|
engine_client = AsyncLLMEngine.from_vllm_config(
|
||||||
vllm_config=vllm_config,
|
vllm_config=vllm_config,
|
||||||
usage_context=usage_context,
|
usage_context=usage_context,
|
||||||
disable_log_requests=engine_args.disable_log_requests,
|
enable_log_requests=engine_args.enable_log_requests,
|
||||||
disable_log_stats=engine_args.disable_log_stats)
|
disable_log_stats=engine_args.disable_log_stats)
|
||||||
yield engine_client
|
yield engine_client
|
||||||
finally:
|
finally:
|
||||||
@ -272,7 +272,7 @@ async def build_async_engine_client_from_engine_args(
|
|||||||
target=run_mp_engine,
|
target=run_mp_engine,
|
||||||
args=(vllm_config, UsageContext.OPENAI_API_SERVER, ipc_path,
|
args=(vllm_config, UsageContext.OPENAI_API_SERVER, ipc_path,
|
||||||
engine_args.disable_log_stats,
|
engine_args.disable_log_stats,
|
||||||
engine_args.disable_log_requests, engine_alive))
|
engine_args.enable_log_requests, engine_alive))
|
||||||
engine_process.start()
|
engine_process.start()
|
||||||
engine_pid = engine_process.pid
|
engine_pid = engine_process.pid
|
||||||
assert engine_pid is not None, "Engine process failed to start."
|
assert engine_pid is not None, "Engine process failed to start."
|
||||||
@ -1570,10 +1570,10 @@ async def init_app_state(
|
|||||||
else:
|
else:
|
||||||
served_model_names = [args.model]
|
served_model_names = [args.model]
|
||||||
|
|
||||||
if args.disable_log_requests:
|
if args.enable_log_requests:
|
||||||
request_logger = None
|
|
||||||
else:
|
|
||||||
request_logger = RequestLogger(max_log_len=args.max_log_len)
|
request_logger = RequestLogger(max_log_len=args.max_log_len)
|
||||||
|
else:
|
||||||
|
request_logger = None
|
||||||
|
|
||||||
base_model_paths = [
|
base_model_paths = [
|
||||||
BaseModelPath(name=name, model_path=args.model)
|
BaseModelPath(name=name, model_path=args.model)
|
||||||
|
|||||||
@ -324,10 +324,10 @@ async def run_batch(
|
|||||||
else:
|
else:
|
||||||
served_model_names = [args.model]
|
served_model_names = [args.model]
|
||||||
|
|
||||||
if args.disable_log_requests:
|
if args.enable_log_requests:
|
||||||
request_logger = None
|
|
||||||
else:
|
|
||||||
request_logger = RequestLogger(max_log_len=args.max_log_len)
|
request_logger = RequestLogger(max_log_len=args.max_log_len)
|
||||||
|
else:
|
||||||
|
request_logger = None
|
||||||
|
|
||||||
base_model_paths = [
|
base_model_paths = [
|
||||||
BaseModelPath(name=name, model_path=args.model)
|
BaseModelPath(name=name, model_path=args.model)
|
||||||
|
|||||||
@ -1668,6 +1668,12 @@ class FlexibleArgumentParser(ArgumentParser):
|
|||||||
# Enable the deprecated kwarg for Python 3.12 and below
|
# Enable the deprecated kwarg for Python 3.12 and below
|
||||||
|
|
||||||
def parse_known_args(self, args=None, namespace=None):
|
def parse_known_args(self, args=None, namespace=None):
|
||||||
|
if args is not None and "--disable-log-requests" in args:
|
||||||
|
# Special case warning because the warning below won't trigger
|
||||||
|
# if –-disable-log-requests because its value is default.
|
||||||
|
logger.warning_once(
|
||||||
|
"argument '--disable-log-requests' is deprecated. This "
|
||||||
|
"will be removed in v0.12.0.")
|
||||||
namespace, args = super().parse_known_args(args, namespace)
|
namespace, args = super().parse_known_args(args, namespace)
|
||||||
for action in FlexibleArgumentParser._deprecated:
|
for action in FlexibleArgumentParser._deprecated:
|
||||||
if (hasattr(namespace, dest := action.dest)
|
if (hasattr(namespace, dest := action.dest)
|
||||||
|
|||||||
@ -27,7 +27,7 @@ from vllm.transformers_utils.config import (
|
|||||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||||
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
|
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
|
||||||
from vllm.usage.usage_lib import UsageContext
|
from vllm.usage.usage_lib import UsageContext
|
||||||
from vllm.utils import Device, cdiv
|
from vllm.utils import Device, cdiv, deprecate_kwargs
|
||||||
from vllm.v1.engine import EngineCoreRequest
|
from vllm.v1.engine import EngineCoreRequest
|
||||||
from vllm.v1.engine.core_client import EngineCoreClient
|
from vllm.v1.engine.core_client import EngineCoreClient
|
||||||
from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError
|
from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError
|
||||||
@ -142,16 +142,22 @@ class AsyncLLM(EngineClient):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@deprecate_kwargs(
|
||||||
|
"disable_log_requests",
|
||||||
|
additional_message=("This argument will have no effect. "
|
||||||
|
"Use `enable_log_requests` instead."),
|
||||||
|
)
|
||||||
def from_vllm_config(
|
def from_vllm_config(
|
||||||
cls,
|
cls,
|
||||||
vllm_config: VllmConfig,
|
vllm_config: VllmConfig,
|
||||||
start_engine_loop: bool = True,
|
start_engine_loop: bool = True,
|
||||||
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
||||||
stat_loggers: Optional[list[StatLoggerFactory]] = None,
|
stat_loggers: Optional[list[StatLoggerFactory]] = None,
|
||||||
disable_log_requests: bool = False,
|
enable_log_requests: bool = False,
|
||||||
disable_log_stats: bool = False,
|
disable_log_stats: bool = False,
|
||||||
client_addresses: Optional[dict[str, str]] = None,
|
client_addresses: Optional[dict[str, str]] = None,
|
||||||
client_index: int = 0,
|
client_index: int = 0,
|
||||||
|
disable_log_requests: bool = True, # Deprecated, will be removed
|
||||||
) -> "AsyncLLM":
|
) -> "AsyncLLM":
|
||||||
if not envs.VLLM_USE_V1:
|
if not envs.VLLM_USE_V1:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@ -166,7 +172,7 @@ class AsyncLLM(EngineClient):
|
|||||||
executor_class=Executor.get_class(vllm_config),
|
executor_class=Executor.get_class(vllm_config),
|
||||||
start_engine_loop=start_engine_loop,
|
start_engine_loop=start_engine_loop,
|
||||||
stat_loggers=stat_loggers,
|
stat_loggers=stat_loggers,
|
||||||
log_requests=not disable_log_requests,
|
log_requests=enable_log_requests,
|
||||||
log_stats=not disable_log_stats,
|
log_stats=not disable_log_stats,
|
||||||
usage_context=usage_context,
|
usage_context=usage_context,
|
||||||
client_addresses=client_addresses,
|
client_addresses=client_addresses,
|
||||||
@ -191,7 +197,7 @@ class AsyncLLM(EngineClient):
|
|||||||
return cls(
|
return cls(
|
||||||
vllm_config=vllm_config,
|
vllm_config=vllm_config,
|
||||||
executor_class=executor_class,
|
executor_class=executor_class,
|
||||||
log_requests=not engine_args.disable_log_requests,
|
log_requests=engine_args.enable_log_requests,
|
||||||
log_stats=not engine_args.disable_log_stats,
|
log_stats=not engine_args.disable_log_stats,
|
||||||
start_engine_loop=start_engine_loop,
|
start_engine_loop=start_engine_loop,
|
||||||
usage_context=usage_context,
|
usage_context=usage_context,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user