mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 21:25:01 +08:00
Reduce GPU memory utilization to make sure OOM doesn't happen (#153)
This commit is contained in:
parent
bec7b2dc26
commit
bf5f121c02
@ -21,7 +21,7 @@ class EngineArgs:
|
||||
tensor_parallel_size: int = 1
|
||||
block_size: int = 16
|
||||
swap_space: int = 4 # GiB
|
||||
gpu_memory_utilization: float = 0.95
|
||||
gpu_memory_utilization: float = 0.90
|
||||
max_num_batched_tokens: int = 2560
|
||||
max_num_seqs: int = 256
|
||||
disable_log_stats: bool = False
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user