mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-28 22:37:11 +08:00
Merge a8cb4f78c50bcd70ee927cc775f6afe79d562f25 into 254f6b986720c92ddf97fbb1a6a6465da8e87e29
This commit is contained in:
commit
c84b0f1daf
@ -187,6 +187,20 @@ class RemoteOpenAIServer:
|
|||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
# force kill if needed
|
# force kill if needed
|
||||||
self.proc.kill()
|
self.proc.kill()
|
||||||
|
self.__cleanup_gpu_memory()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def __cleanup_gpu_memory():
|
||||||
|
try:
|
||||||
|
if current_platform.is_cuda_alike():
|
||||||
|
num_devices = cuda_device_count_stateless()
|
||||||
|
if num_devices > 0:
|
||||||
|
wait_for_gpu_memory_to_clear(devices=list(
|
||||||
|
range(num_devices)),
|
||||||
|
threshold_ratio=0.05,
|
||||||
|
timeout_s=60)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"GPU cleanup warning: {e}")
|
||||||
|
|
||||||
def _poll(self) -> int | None:
|
def _poll(self) -> int | None:
|
||||||
"""Subclasses override this method to customize process polling"""
|
"""Subclasses override this method to customize process polling"""
|
||||||
@ -310,6 +324,8 @@ class RemoteOpenAIServerCustom(RemoteOpenAIServer):
|
|||||||
# force kill if needed
|
# force kill if needed
|
||||||
self.proc.kill()
|
self.proc.kill()
|
||||||
|
|
||||||
|
self.__cleanup_gpu_memory()
|
||||||
|
|
||||||
|
|
||||||
def _test_completion(
|
def _test_completion(
|
||||||
client: openai.OpenAI,
|
client: openai.OpenAI,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user