fix: use wait_for_gpu_memory_to_clear to clear gpu memory

Signed-off-by: AzizCode92 <azizbenothman76@gmail.com>
This commit is contained in:
AzizCode92 2025-09-04 20:35:27 +02:00
parent 19ffe12f32
commit 4c8625fdb1

View File

@ -4,7 +4,6 @@
import asyncio import asyncio
import copy import copy
import functools import functools
import gc
import importlib import importlib
import json import json
import os import os
@ -174,11 +173,10 @@ class RemoteOpenAIServer:
# GPU memory cleanup # GPU memory cleanup
try: try:
if torch.cuda.is_available(): if torch.cuda.is_available():
torch.cuda.empty_cache() devices_to_clear = list(range(torch.cuda.device_count()))
gc.collect() if devices_to_clear:
torch.cuda.synchronize() wait_for_gpu_memory_to_clear(devices=devices_to_clear,
# Small delay to ensure cleanup completes threshold_ratio=0.05)
time.sleep(0.5)
except Exception as e: except Exception as e:
print(f"GPU cleanup warning: {e}") print(f"GPU cleanup warning: {e}")
@ -280,11 +278,10 @@ class RemoteOpenAIServerCustom(RemoteOpenAIServer):
# GPU memory cleaning # GPU memory cleaning
try: try:
if torch.cuda.is_available(): if torch.cuda.is_available():
torch.cuda.empty_cache() devices_to_clear = list(range(torch.cuda.device_count()))
gc.collect() if devices_to_clear:
torch.cuda.synchronize() wait_for_gpu_memory_to_clear(devices=devices_to_clear,
# Small delay to ensure cleanup completes threshold_ratio=0.05)
time.sleep(0.5)
except Exception as e: except Exception as e:
print(f"GPU cleanup warning: {e}") print(f"GPU cleanup warning: {e}")