Reduce GPU memory utilization to make sure OOM doesn't happen (#153)

This commit is contained in:
Zhuohan Li 2023-06-18 17:33:50 +08:00 committed by GitHub
parent bec7b2dc26
commit bf5f121c02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -21,7 +21,7 @@ class EngineArgs:
tensor_parallel_size: int = 1
block_size: int = 16
swap_space: int = 4 # GiB
gpu_memory_utilization: float = 0.95
gpu_memory_utilization: float = 0.90
max_num_batched_tokens: int = 2560
max_num_seqs: int = 256
disable_log_stats: bool = False