diff --git a/tests/v1/engine/test_engine_core.py b/tests/v1/engine/test_engine_core.py index afbe15b9d46e3..ca5ff8fa84544 100644 --- a/tests/v1/engine/test_engine_core.py +++ b/tests/v1/engine/test_engine_core.py @@ -158,6 +158,22 @@ def test_engine_core(monkeypatch: pytest.MonkeyPatch): assert len(engine_core.scheduler.waiting) == 0 assert len(engine_core.scheduler.running) == 0 + # Sending duplicate requests with same request_id + req0 = make_request() + req1 = make_request() + req0.request_id = req1.request_id = "test" + engine_core.add_request(req0) + + while len(engine_core.step().outputs) > 0: + pass + + engine_core.add_request(req1) + while len(engine_core.step().outputs) > 0: + pass + + assert len(engine_core.scheduler.waiting) == 0 + assert len(engine_core.scheduler.running) == 0 + @create_new_process_for_each_test() def test_engine_core_advanced_sampling(monkeypatch: pytest.MonkeyPatch): diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 8f93d3c71cdf3..b0c18aee97c28 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -179,16 +179,6 @@ class EngineCore: scheduler_stats=self.scheduler.make_stats(), ) scheduler_output = self.scheduler.schedule() - - # This case may occur when the only unfinished requests are - # structured output requests where the grammar has not finished - # compiling yet, so there's nothing to run. - if scheduler_output.total_num_scheduled_tokens == 0: - return EngineCoreOutputs( - outputs=[], - scheduler_stats=self.scheduler.make_stats(), - ) - output = self.model_executor.execute_model(scheduler_output) engine_core_outputs = self.scheduler.update_from_output( scheduler_output, output) # type: ignore