From f5d72b2fc6771de19c351945f1fbbb0198d53b8e Mon Sep 17 00:00:00 2001 From: sroy745 <142070531+sroy745@users.noreply.github.com> Date: Thu, 3 Oct 2024 09:44:21 -0700 Subject: [PATCH] [Core] Make BlockSpaceManagerV2 the default BlockManager to use. (#8678) --- vllm/config.py | 2 +- vllm/engine/arg_utils.py | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 1310c07ade482..05d5f4998d74d 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -970,7 +970,7 @@ class SchedulerConfig: max_num_batched_tokens: Optional[int], max_num_seqs: int, max_model_len: int, - use_v2_block_manager: bool = False, + use_v2_block_manager: bool = True, num_lookahead_slots: int = 0, delay_factor: float = 0.0, enable_chunked_prefill: bool = False, diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index c97b6ffb093f7..097fe7c02444c 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -107,7 +107,7 @@ class EngineArgs: block_size: int = 16 enable_prefix_caching: bool = False disable_sliding_window: bool = False - use_v2_block_manager: bool = False + use_v2_block_manager: bool = True swap_space: float = 4 # GiB cpu_offload_gb: float = 0 # GiB gpu_memory_utilization: float = 0.90 @@ -369,9 +369,12 @@ class EngineArgs: action='store_true', help='Disables sliding window, ' 'capping to sliding window size') - parser.add_argument('--use-v2-block-manager', - action='store_true', - help='Use BlockSpaceMangerV2.') + parser.add_argument( + '--use-v2-block-manager', + default=EngineArgs.use_v2_block_manager, + action='store_true', + help='Use BlockSpaceMangerV2. By default this is set to True. ' + 'Set to False to use BlockSpaceManagerV1') parser.add_argument( '--num-lookahead-slots', type=int,