mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-28 04:17:03 +08:00
fix: use wait_for_gpu_memory_to_clear to clear gpu memory
Signed-off-by: AzizCode92 <azizbenothman76@gmail.com>
This commit is contained in:
parent
19ffe12f32
commit
4c8625fdb1
@ -4,7 +4,6 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import copy
|
import copy
|
||||||
import functools
|
import functools
|
||||||
import gc
|
|
||||||
import importlib
|
import importlib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
@ -174,11 +173,10 @@ class RemoteOpenAIServer:
|
|||||||
# GPU memory cleanup
|
# GPU memory cleanup
|
||||||
try:
|
try:
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
torch.cuda.empty_cache()
|
devices_to_clear = list(range(torch.cuda.device_count()))
|
||||||
gc.collect()
|
if devices_to_clear:
|
||||||
torch.cuda.synchronize()
|
wait_for_gpu_memory_to_clear(devices=devices_to_clear,
|
||||||
# Small delay to ensure cleanup completes
|
threshold_ratio=0.05)
|
||||||
time.sleep(0.5)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"GPU cleanup warning: {e}")
|
print(f"GPU cleanup warning: {e}")
|
||||||
|
|
||||||
@ -280,11 +278,10 @@ class RemoteOpenAIServerCustom(RemoteOpenAIServer):
|
|||||||
# GPU memory cleaning
|
# GPU memory cleaning
|
||||||
try:
|
try:
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
torch.cuda.empty_cache()
|
devices_to_clear = list(range(torch.cuda.device_count()))
|
||||||
gc.collect()
|
if devices_to_clear:
|
||||||
torch.cuda.synchronize()
|
wait_for_gpu_memory_to_clear(devices=devices_to_clear,
|
||||||
# Small delay to ensure cleanup completes
|
threshold_ratio=0.05)
|
||||||
time.sleep(0.5)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"GPU cleanup warning: {e}")
|
print(f"GPU cleanup warning: {e}")
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user