diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py index 1d5175eb6adc3..d79bce194b713 100644 --- a/vllm/v1/executor/multiproc_executor.py +++ b/vllm/v1/executor/multiproc_executor.py @@ -119,10 +119,9 @@ class MultiprocExecutor(Executor): timeout=dequeue_timeout) if status != WorkerProc.ResponseStatus.SUCCESS: - if isinstance(result, Exception): - raise result - else: - raise RuntimeError("Worker failed") + raise RuntimeError( + "Worker failed with error %s, please check the" + " stack trace above for the root cause", result) responses[w.rank] = result @@ -378,9 +377,11 @@ class WorkerProc: # Notes have been introduced in python 3.11 if hasattr(e, "add_note"): e.add_note(traceback.format_exc()) - self.worker_response_mq.enqueue( - (WorkerProc.ResponseStatus.FAILURE, e)) logger.exception("WorkerProc hit an exception: %s", exc_info=e) + # exception might not be serializable, so we convert it to + # string, only for logging purpose. + self.worker_response_mq.enqueue( + (WorkerProc.ResponseStatus.FAILURE, str(e))) continue self.worker_response_mq.enqueue(