Merge a8cb4f78c50bcd70ee927cc775f6afe79d562f25 into 254f6b986720c92ddf97fbb1a6a6465da8e87e29

This commit is contained in:
Aziz 2025-12-25 08:09:22 +08:00 committed by GitHub
commit c84b0f1daf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -187,6 +187,20 @@ class RemoteOpenAIServer:
except subprocess.TimeoutExpired:
# force kill if needed
self.proc.kill()
self.__cleanup_gpu_memory()
@staticmethod
def __cleanup_gpu_memory():
try:
if current_platform.is_cuda_alike():
num_devices = cuda_device_count_stateless()
if num_devices > 0:
wait_for_gpu_memory_to_clear(devices=list(
range(num_devices)),
threshold_ratio=0.05,
timeout_s=60)
except Exception as e:
print(f"GPU cleanup warning: {e}")
def _poll(self) -> int | None:
"""Subclasses override this method to customize process polling"""
@ -310,6 +324,8 @@ class RemoteOpenAIServerCustom(RemoteOpenAIServer):
# force kill if needed
self.proc.kill()
self.__cleanup_gpu_memory()
def _test_completion(
client: openai.OpenAI,