Dump input metadata on crash for async scheduling (#21258)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-12-13 12:34:57 +08:00 · 2025-07-23 21:10:30 -07:00 · 2025-07-23 21:10:30 -07:00 · dc2f159f8a
commit dc2f159f8a
parent d5b981f8b1
1 changed files with 14 additions and 4 deletions
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@ -234,9 +234,14 @@ class EngineCore:
        self.scheduler.finish_requests(request_ids,
                                       RequestStatus.FINISHED_ABORTED)
-    def execute_model(self, scheduler_output: SchedulerOutput):
+    def execute_model_with_error_logging(
        self,
        model_fn: Callable[[SchedulerOutput], ModelRunnerOutput],
        scheduler_output: SchedulerOutput,
    ) -> ModelRunnerOutput:
        """Execute the model and log detailed info on failure."""
        try:
-            return self.model_executor.execute_model(scheduler_output)
+            return model_fn(scheduler_output)
        except Exception as err:
            # We do not want to catch BaseException here since we're only
            # interested in dumping info when the exception is due to an
@ -259,7 +264,9 @@ class EngineCore:
        if not self.scheduler.has_requests():
            return {}, False
        scheduler_output = self.scheduler.schedule()
-        model_output = self.execute_model(scheduler_output)
+        model_output = self.execute_model_with_error_logging(
            self.model_executor.execute_model,  # type: ignore
            scheduler_output)
        engine_core_outputs = self.scheduler.update_from_output(
            scheduler_output, model_output)  # type: ignore
@ -306,8 +313,11 @@ class EngineCore:
        # so we need more work.
        if not scheduled_batch and not self.batch_queue.empty():
            future, scheduler_output = self.batch_queue.get_nowait()
            # Blocking until the first result is available.
-            model_output = future.result()
+            model_output = self.execute_model_with_error_logging(
                lambda _: future.result(), scheduler_output)
            self.batch_queue.task_done()
            engine_core_outputs = (self.scheduler.update_from_output(
                scheduler_output, model_output))