[Core] Change execute_model_with_error_logging() to be a ctx manager (#27060)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill 2025-10-16 20:45:32 -07:00 committed by GitHub
parent bde9e2272a
commit fe3b9372ad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -290,14 +290,11 @@ class EngineCore:
# (i.e. client-aborted vs stop criteria met). # (i.e. client-aborted vs stop criteria met).
self.scheduler.finish_requests(request_ids, RequestStatus.FINISHED_ABORTED) self.scheduler.finish_requests(request_ids, RequestStatus.FINISHED_ABORTED)
def execute_model_with_error_logging( @contextmanager
self, def log_error_detail(self, scheduler_output: SchedulerOutput):
model_fn: Callable[[SchedulerOutput], ModelRunnerOutput],
scheduler_output: SchedulerOutput,
) -> ModelRunnerOutput:
"""Execute the model and log detailed info on failure.""" """Execute the model and log detailed info on failure."""
try: try:
return model_fn(scheduler_output) yield
except Exception as err: except Exception as err:
# We do not want to catch BaseException here since we're only # We do not want to catch BaseException here since we're only
# interested in dumping info when the exception is due to an # interested in dumping info when the exception is due to an
@ -321,15 +318,16 @@ class EngineCore:
if not self.scheduler.has_requests(): if not self.scheduler.has_requests():
return {}, False return {}, False
scheduler_output = self.scheduler.schedule() scheduler_output = self.scheduler.schedule()
model_output = self.execute_model_with_error_logging(
self.model_executor.execute_model, # type: ignore with self.log_error_detail(scheduler_output):
scheduler_output, model_output = self.model_executor.execute_model(scheduler_output)
)
assert isinstance(model_output, ModelRunnerOutput)
engine_core_outputs = self.scheduler.update_from_output( engine_core_outputs = self.scheduler.update_from_output(
scheduler_output, model_output scheduler_output, model_output
) )
return (engine_core_outputs, scheduler_output.total_num_scheduled_tokens > 0) return engine_core_outputs, scheduler_output.total_num_scheduled_tokens > 0
def post_step(self, model_executed: bool) -> None: def post_step(self, model_executed: bool) -> None:
if self.use_spec_decode and model_executed: if self.use_spec_decode and model_executed:
@ -386,14 +384,12 @@ class EngineCore:
# Block until the next result is available. # Block until the next result is available.
future, scheduler_output = batch_queue.pop() future, scheduler_output = batch_queue.pop()
model_output = self.execute_model_with_error_logging( with self.log_error_detail(scheduler_output):
lambda _: future.result(), scheduler_output model_output = future.result()
)
engine_core_outputs = self.scheduler.update_from_output( engine_core_outputs = self.scheduler.update_from_output(
scheduler_output, model_output scheduler_output, model_output
) )
return engine_core_outputs, model_executed return engine_core_outputs, model_executed
def shutdown(self): def shutdown(self):