From 19ffe12f323fa24defac4cf063ec2687f649802c Mon Sep 17 00:00:00 2001
From: AzizCode92 <azizbenothman76@gmail.com>
Date: Thu, 4 Sep 2025 17:50:51 +0200
Subject: [PATCH] [feat]: ensure the gpu memory is cleaned when exiting the
 remote openAI server

Signed-off-by: AzizCode92 <azizbenothman76@gmail.com>
---
 tests/utils.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/utils.py b/tests/utils.py
index e47235002657d..8eb12e9c4866e 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -4,6 +4,7 @@
 import asyncio
 import copy
 import functools
+import gc
 import importlib
 import json
 import os
@@ -170,6 +171,16 @@ class RemoteOpenAIServer:
         except subprocess.TimeoutExpired:
             # force kill if needed
             self.proc.kill()
+        # GPU memory cleanup
+        try:
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                gc.collect()
+                torch.cuda.synchronize()
+                # Small delay to ensure cleanup completes
+                time.sleep(0.5)
+        except Exception as e:
+            print(f"GPU cleanup warning: {e}")
 
     def _poll(self) -> Optional[int]:
         """Subclasses override this method to customize process polling"""
@@ -266,6 +277,17 @@ class RemoteOpenAIServerCustom(RemoteOpenAIServer):
             # force kill if needed
             self.proc.kill()
 
+        # GPU memory cleaning
+        try:
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                gc.collect()
+                torch.cuda.synchronize()
+                # Small delay to ensure cleanup completes
+                time.sleep(0.5)
+        except Exception as e:
+            print(f"GPU cleanup warning: {e}")
+
 
 def _test_completion(
     client: openai.OpenAI,