From fe3b9372adfe968360c6754c15058380ab5e87f1 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Thu, 16 Oct 2025 20:45:32 -0700 Subject: [PATCH] [Core] Change `execute_model_with_error_logging()` to be a ctx manager (#27060) Signed-off-by: Nick Hill --- vllm/v1/engine/core.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 0ca60ce5cf9ad..2773dc61ff3d7 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -290,14 +290,11 @@ class EngineCore: # (i.e. client-aborted vs stop criteria met). self.scheduler.finish_requests(request_ids, RequestStatus.FINISHED_ABORTED) - def execute_model_with_error_logging( - self, - model_fn: Callable[[SchedulerOutput], ModelRunnerOutput], - scheduler_output: SchedulerOutput, - ) -> ModelRunnerOutput: + @contextmanager + def log_error_detail(self, scheduler_output: SchedulerOutput): """Execute the model and log detailed info on failure.""" try: - return model_fn(scheduler_output) + yield except Exception as err: # We do not want to catch BaseException here since we're only # interested in dumping info when the exception is due to an @@ -321,15 +318,16 @@ class EngineCore: if not self.scheduler.has_requests(): return {}, False scheduler_output = self.scheduler.schedule() - model_output = self.execute_model_with_error_logging( - self.model_executor.execute_model, # type: ignore - scheduler_output, - ) + + with self.log_error_detail(scheduler_output): + model_output = self.model_executor.execute_model(scheduler_output) + + assert isinstance(model_output, ModelRunnerOutput) engine_core_outputs = self.scheduler.update_from_output( scheduler_output, model_output ) - return (engine_core_outputs, scheduler_output.total_num_scheduled_tokens > 0) + return engine_core_outputs, scheduler_output.total_num_scheduled_tokens > 0 def post_step(self, model_executed: bool) -> None: if self.use_spec_decode and model_executed: @@ -386,14 +384,12 @@ class EngineCore: # Block until the next result is available. future, scheduler_output = batch_queue.pop() - model_output = self.execute_model_with_error_logging( - lambda _: future.result(), scheduler_output - ) + with self.log_error_detail(scheduler_output): + model_output = future.result() engine_core_outputs = self.scheduler.update_from_output( scheduler_output, model_output ) - return engine_core_outputs, model_executed def shutdown(self):