fix: use wait_for_gpu_memory_to_clear to clear gpu memory

Signed-off-by: AzizCode92 <azizbenothman76@gmail.com>
2026-05-11 06:25:51 +08:00 · 2025-09-04 20:35:27 +02:00 · 2025-09-04 20:35:27 +02:00 · 4c8625fdb1
commit 4c8625fdb1
parent 19ffe12f32
1 changed files with 8 additions and 11 deletions
--- a/tests/utils.py
+++ b/tests/utils.py
@ -4,7 +4,6 @@
 import asyncio
 import copy
 import functools
-import gc
 import importlib
 import json
 import os
@ -174,11 +173,10 @@ class RemoteOpenAIServer:
        # GPU memory cleanup
        try:
            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-                gc.collect()
-                torch.cuda.synchronize()
-                # Small delay to ensure cleanup completes
-                time.sleep(0.5)
+                devices_to_clear = list(range(torch.cuda.device_count()))
+                if devices_to_clear:
+                    wait_for_gpu_memory_to_clear(devices=devices_to_clear,
+                                                 threshold_ratio=0.05)
        except Exception as e:
            print(f"GPU cleanup warning: {e}")

@ -280,11 +278,10 @@ class RemoteOpenAIServerCustom(RemoteOpenAIServer):
        # GPU memory cleaning
        try:
            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-                gc.collect()
-                torch.cuda.synchronize()
-                # Small delay to ensure cleanup completes
-                time.sleep(0.5)
+                devices_to_clear = list(range(torch.cuda.device_count()))
+                if devices_to_clear:
+                    wait_for_gpu_memory_to_clear(devices=devices_to_clear,
+                                                 threshold_ratio=0.05)
        except Exception as e:
            print(f"GPU cleanup warning: {e}")