mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-23 21:54:36 +08:00
[Core] Change execute_model_with_error_logging() to be a ctx manager (#27060)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
bde9e2272a
commit
fe3b9372ad
@ -290,14 +290,11 @@ class EngineCore:
|
||||
# (i.e. client-aborted vs stop criteria met).
|
||||
self.scheduler.finish_requests(request_ids, RequestStatus.FINISHED_ABORTED)
|
||||
|
||||
def execute_model_with_error_logging(
|
||||
self,
|
||||
model_fn: Callable[[SchedulerOutput], ModelRunnerOutput],
|
||||
scheduler_output: SchedulerOutput,
|
||||
) -> ModelRunnerOutput:
|
||||
@contextmanager
|
||||
def log_error_detail(self, scheduler_output: SchedulerOutput):
|
||||
"""Execute the model and log detailed info on failure."""
|
||||
try:
|
||||
return model_fn(scheduler_output)
|
||||
yield
|
||||
except Exception as err:
|
||||
# We do not want to catch BaseException here since we're only
|
||||
# interested in dumping info when the exception is due to an
|
||||
@ -321,15 +318,16 @@ class EngineCore:
|
||||
if not self.scheduler.has_requests():
|
||||
return {}, False
|
||||
scheduler_output = self.scheduler.schedule()
|
||||
model_output = self.execute_model_with_error_logging(
|
||||
self.model_executor.execute_model, # type: ignore
|
||||
scheduler_output,
|
||||
)
|
||||
|
||||
with self.log_error_detail(scheduler_output):
|
||||
model_output = self.model_executor.execute_model(scheduler_output)
|
||||
|
||||
assert isinstance(model_output, ModelRunnerOutput)
|
||||
engine_core_outputs = self.scheduler.update_from_output(
|
||||
scheduler_output, model_output
|
||||
)
|
||||
|
||||
return (engine_core_outputs, scheduler_output.total_num_scheduled_tokens > 0)
|
||||
return engine_core_outputs, scheduler_output.total_num_scheduled_tokens > 0
|
||||
|
||||
def post_step(self, model_executed: bool) -> None:
|
||||
if self.use_spec_decode and model_executed:
|
||||
@ -386,14 +384,12 @@ class EngineCore:
|
||||
|
||||
# Block until the next result is available.
|
||||
future, scheduler_output = batch_queue.pop()
|
||||
model_output = self.execute_model_with_error_logging(
|
||||
lambda _: future.result(), scheduler_output
|
||||
)
|
||||
with self.log_error_detail(scheduler_output):
|
||||
model_output = future.result()
|
||||
|
||||
engine_core_outputs = self.scheduler.update_from_output(
|
||||
scheduler_output, model_output
|
||||
)
|
||||
|
||||
return engine_core_outputs, model_executed
|
||||
|
||||
def shutdown(self):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user