mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 02:55:40 +08:00
[Bug] Fix the OOM condition for CPU cache (#260)
This commit is contained in:
parent
471a7a4566
commit
0b7db411b5
@ -128,7 +128,7 @@ class LLMEngine:
|
|||||||
logger.info(f'# GPU blocks: {num_gpu_blocks}, '
|
logger.info(f'# GPU blocks: {num_gpu_blocks}, '
|
||||||
f'# CPU blocks: {num_cpu_blocks}')
|
f'# CPU blocks: {num_cpu_blocks}')
|
||||||
|
|
||||||
if num_gpu_blocks <= 0 or num_cpu_blocks <= 0:
|
if num_gpu_blocks <= 0:
|
||||||
raise ValueError("No available memory for the cache blocks. "
|
raise ValueError("No available memory for the cache blocks. "
|
||||||
"Try increasing `gpu_memory_utilization` when "
|
"Try increasing `gpu_memory_utilization` when "
|
||||||
"initializing the engine.")
|
"initializing the engine.")
|
||||||
|
|||||||
@ -113,6 +113,8 @@ class Worker:
|
|||||||
num_gpu_blocks = int((total_gpu_memory * gpu_memory_utilization
|
num_gpu_blocks = int((total_gpu_memory * gpu_memory_utilization
|
||||||
- peak_memory) // cache_block_size)
|
- peak_memory) // cache_block_size)
|
||||||
num_cpu_blocks = int(cpu_swap_space // cache_block_size)
|
num_cpu_blocks = int(cpu_swap_space // cache_block_size)
|
||||||
|
num_gpu_blocks = max(num_gpu_blocks, 0)
|
||||||
|
num_cpu_blocks = max(num_cpu_blocks, 0)
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
# Reset the seed to ensure that the random state is not affected by
|
# Reset the seed to ensure that the random state is not affected by
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user