mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 16:36:02 +08:00
Reduce GPU memory utilization to make sure OOM doesn't happen (#153)
This commit is contained in:
parent
bec7b2dc26
commit
bf5f121c02
@ -21,7 +21,7 @@ class EngineArgs:
|
|||||||
tensor_parallel_size: int = 1
|
tensor_parallel_size: int = 1
|
||||||
block_size: int = 16
|
block_size: int = 16
|
||||||
swap_space: int = 4 # GiB
|
swap_space: int = 4 # GiB
|
||||||
gpu_memory_utilization: float = 0.95
|
gpu_memory_utilization: float = 0.90
|
||||||
max_num_batched_tokens: int = 2560
|
max_num_batched_tokens: int = 2560
|
||||||
max_num_seqs: int = 256
|
max_num_seqs: int = 256
|
||||||
disable_log_stats: bool = False
|
disable_log_stats: bool = False
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user