From 9ea82ecd25cb97f3aa11f8339c950a0ee93d8792 Mon Sep 17 00:00:00 2001 From: Nikhil G Date: Fri, 3 Oct 2025 11:39:45 -0700 Subject: [PATCH] Fix V1 engine serialization error with Ray distributed executor (#26148) Signed-off-by: Nikhil Ghosh Signed-off-by: yewentao256 --- vllm/executor/ray_utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/executor/ray_utils.py b/vllm/executor/ray_utils.py index d8eb7977dbde..5b76334722e9 100644 --- a/vllm/executor/ray_utils.py +++ b/vllm/executor/ray_utils.py @@ -16,6 +16,7 @@ from vllm.logger import init_logger from vllm.platforms import current_platform from vllm.sequence import ExecuteModelRequest, IntermediateTensors from vllm.utils import get_ip +from vllm.v1.outputs import AsyncModelRunnerOutput from vllm.v1.worker.worker_base import WorkerWrapperBase if TYPE_CHECKING: @@ -142,6 +143,11 @@ try: # but may still be finished requests. assert not output or not output.req_ids output = scheduler_output, None + # Ensure outputs crossing Ray compiled DAG are serializable. + # AsyncModelRunnerOutput holds CUDA events and cannot be + # pickled. + if isinstance(output, AsyncModelRunnerOutput): + output = output.get_output() return output def override_env_vars(self, vars: Dict[str, str]):