From 95592fa00a01c1e779137f72814c8091736e1c86 Mon Sep 17 00:00:00 2001 From: Roy Date: Tue, 19 Sep 2023 02:49:10 +0800 Subject: [PATCH] align llm_engine and async_engine. (#1081) --- vllm/engine/async_llm_engine.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 40ac475228476..94674e9e6d8b8 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -183,10 +183,9 @@ class _AsyncLLMEngine(LLMEngine): and updates the scheduler with the model outputs. Finally, it decodes the sequences and returns the newly generated results. """ - (seq_group_metadata_list, scheduler_outputs, - early_return) = self._schedule() - if early_return is not None: - return early_return + seq_group_metadata_list, scheduler_outputs, ignored = self._schedule() + if scheduler_outputs.is_empty(): + return ignored # Execute the model. output = await self._run_workers_async( @@ -197,7 +196,7 @@ class _AsyncLLMEngine(LLMEngine): blocks_to_copy=scheduler_outputs.blocks_to_copy, ) - return self._process_model_outputs(output, scheduler_outputs) + return self._process_model_outputs(output, scheduler_outputs) + ignored async def _run_workers_async( self,