[Bugfix] fix V1 Engine crash while handling requests with duplicate request id (#15043)

Signed-off-by: Jiahui Sun <jhsun2020@gmail.com>
2025-12-16 11:16:45 +08:00 · 2025-03-21 01:01:02 +08:00 · 2025-03-21 01:01:02 +08:00 · d8e82bc06d
commit d8e82bc06d
parent 086b56824c
2 changed files with 16 additions and 10 deletions
--- a/tests/v1/engine/test_engine_core.py
+++ b/tests/v1/engine/test_engine_core.py
@ -158,6 +158,22 @@ def test_engine_core(monkeypatch: pytest.MonkeyPatch):
        assert len(engine_core.scheduler.waiting) == 0
        assert len(engine_core.scheduler.running) == 0

+        # Sending duplicate requests with same request_id
+        req0 = make_request()
+        req1 = make_request()
+        req0.request_id = req1.request_id = "test"
+        engine_core.add_request(req0)
+
+        while len(engine_core.step().outputs) > 0:
+            pass
+
+        engine_core.add_request(req1)
+        while len(engine_core.step().outputs) > 0:
+            pass
+
+        assert len(engine_core.scheduler.waiting) == 0
+        assert len(engine_core.scheduler.running) == 0
+

@create_new_process_for_each_test()
 def test_engine_core_advanced_sampling(monkeypatch: pytest.MonkeyPatch):
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@ -179,16 +179,6 @@ class EngineCore:
                scheduler_stats=self.scheduler.make_stats(),
            )
        scheduler_output = self.scheduler.schedule()
-
-        # This case may occur when the only unfinished requests are
-        # structured output requests where the grammar has not finished
-        # compiling yet, so there's nothing to run.
-        if scheduler_output.total_num_scheduled_tokens == 0:
-            return EngineCoreOutputs(
-                outputs=[],
-                scheduler_stats=self.scheduler.make_stats(),
-            )
-
        output = self.model_executor.execute_model(scheduler_output)
        engine_core_outputs = self.scheduler.update_from_output(
            scheduler_output, output)  # type: ignore