From 6a9c583e73c75c8eab10a9c607cb096750b751a0 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Mon, 18 Mar 2024 21:06:23 -0700 Subject: [PATCH] [Core] print error before deadlock (#3459) --- vllm/engine/ray_utils.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/vllm/engine/ray_utils.py b/vllm/engine/ray_utils.py index 742f3dc57519..27414f085b45 100644 --- a/vllm/engine/ray_utils.py +++ b/vllm/engine/ray_utils.py @@ -33,8 +33,17 @@ try: return getattr(self.worker, name) def execute_method(self, method, *args, **kwargs): - executor = getattr(self, method) - return executor(*args, **kwargs) + try: + executor = getattr(self, method) + return executor(*args, **kwargs) + except Exception as e: + # exceptions in ray worker may cause deadlock + # see https://github.com/vllm-project/vllm/issues/3455 + # print the error and inform the user to solve the error + msg = (f"Error executing method {method}. " + "This might cause deadlock in distributed execution.") + logger.exception(msg) + raise e def get_node_ip(self) -> str: return get_ip()