diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index f3c8d69e4efe9..93bf8793dae33 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -225,11 +225,11 @@ class _AsyncLLMEngine(LLMEngine): """ seq_group_metadata_list, scheduler_outputs = self.scheduler[ virtual_engine].schedule() - finished_requests_ids = self.scheduler[ - virtual_engine].get_and_reset_finished_requests_ids() if not scheduler_outputs.is_empty(): # Execute the model. + finished_requests_ids = self.scheduler[ + virtual_engine].get_and_reset_finished_requests_ids() execute_model_req = ExecuteModelRequest( seq_group_metadata_list=seq_group_metadata_list, blocks_to_swap_in=scheduler_outputs.blocks_to_swap_in, diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index b476594fc73f6..d354218cf16ea 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -871,10 +871,10 @@ class LLMEngine: "as performance will be severely degraded otherwise.") seq_group_metadata_list, scheduler_outputs = self.scheduler[ 0].schedule() - finished_requests_ids = self.scheduler[ - 0].get_and_reset_finished_requests_ids() if not scheduler_outputs.is_empty(): + finished_requests_ids = self.scheduler[ + 0].get_and_reset_finished_requests_ids() execute_model_req = ExecuteModelRequest( seq_group_metadata_list=seq_group_metadata_list, blocks_to_swap_in=scheduler_outputs.blocks_to_swap_in,