diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 55936cf0986a..10e6070b42c7 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -21,7 +21,7 @@ class EngineArgs: tensor_parallel_size: int = 1 block_size: int = 16 swap_space: int = 4 # GiB - gpu_memory_utilization: float = 0.95 + gpu_memory_utilization: float = 0.90 max_num_batched_tokens: int = 2560 max_num_seqs: int = 256 disable_log_stats: bool = False