diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index c936646aa7993..807db8275fbf5 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -268,7 +268,8 @@ class InprocClient(EngineCoreClient): self.engine_core = EngineCore(*args, **kwargs) def get_output(self) -> EngineCoreOutputs: - outputs, _ = self.engine_core.step_fn() + outputs, model_executed = self.engine_core.step_fn() + self.engine_core.post_step(model_executed=model_executed) return outputs and outputs.get(0) or EngineCoreOutputs() def get_supported_tasks(self) -> tuple[SupportedTask, ...]: