mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-11 06:25:51 +08:00
[feat]: ensure the gpu memory is cleaned when exiting the remote openAI server
Signed-off-by: AzizCode92 <azizbenothman76@gmail.com>
This commit is contained in:
parent
e41a0fa377
commit
19ffe12f32
@ -4,6 +4,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import copy
|
import copy
|
||||||
import functools
|
import functools
|
||||||
|
import gc
|
||||||
import importlib
|
import importlib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
@ -170,6 +171,16 @@ class RemoteOpenAIServer:
|
|||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
# force kill if needed
|
# force kill if needed
|
||||||
self.proc.kill()
|
self.proc.kill()
|
||||||
|
# GPU memory cleanup
|
||||||
|
try:
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
gc.collect()
|
||||||
|
torch.cuda.synchronize()
|
||||||
|
# Small delay to ensure cleanup completes
|
||||||
|
time.sleep(0.5)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"GPU cleanup warning: {e}")
|
||||||
|
|
||||||
def _poll(self) -> Optional[int]:
|
def _poll(self) -> Optional[int]:
|
||||||
"""Subclasses override this method to customize process polling"""
|
"""Subclasses override this method to customize process polling"""
|
||||||
@ -266,6 +277,17 @@ class RemoteOpenAIServerCustom(RemoteOpenAIServer):
|
|||||||
# force kill if needed
|
# force kill if needed
|
||||||
self.proc.kill()
|
self.proc.kill()
|
||||||
|
|
||||||
|
# GPU memory cleaning
|
||||||
|
try:
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
gc.collect()
|
||||||
|
torch.cuda.synchronize()
|
||||||
|
# Small delay to ensure cleanup completes
|
||||||
|
time.sleep(0.5)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"GPU cleanup warning: {e}")
|
||||||
|
|
||||||
|
|
||||||
def _test_completion(
|
def _test_completion(
|
||||||
client: openai.OpenAI,
|
client: openai.OpenAI,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user