[feat]: ensure the gpu memory is cleaned when exiting the remote openAI server

Signed-off-by: AzizCode92 <azizbenothman76@gmail.com>
2026-05-11 03:18:01 +08:00 · 2025-09-04 17:50:51 +02:00 · 2025-09-04 17:50:51 +02:00 · 19ffe12f32
commit 19ffe12f32
parent e41a0fa377
1 changed files with 22 additions and 0 deletions
--- a/tests/utils.py
+++ b/tests/utils.py
@ -4,6 +4,7 @@
 import asyncio
 import copy
 import functools
+import gc
 import importlib
 import json
 import os
@ -170,6 +171,16 @@ class RemoteOpenAIServer:
        except subprocess.TimeoutExpired:
            # force kill if needed
            self.proc.kill()
+        # GPU memory cleanup
+        try:
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                gc.collect()
+                torch.cuda.synchronize()
+                # Small delay to ensure cleanup completes
+                time.sleep(0.5)
+        except Exception as e:
+            print(f"GPU cleanup warning: {e}")

    def _poll(self) -> Optional[int]:
        """Subclasses override this method to customize process polling"""
@ -266,6 +277,17 @@ class RemoteOpenAIServerCustom(RemoteOpenAIServer):
            # force kill if needed
            self.proc.kill()

+        # GPU memory cleaning
+        try:
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                gc.collect()
+                torch.cuda.synchronize()
+                # Small delay to ensure cleanup completes
+                time.sleep(0.5)
+        except Exception as e:
+            print(f"GPU cleanup warning: {e}")
+

 def _test_completion(
    client: openai.OpenAI,