mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-18 07:25:01 +08:00
[Bugfix] fix V1 Engine crash while handling requests with duplicate request id (#15043)
Signed-off-by: Jiahui Sun <jhsun2020@gmail.com>
This commit is contained in:
parent
086b56824c
commit
d8e82bc06d
@ -158,6 +158,22 @@ def test_engine_core(monkeypatch: pytest.MonkeyPatch):
|
|||||||
assert len(engine_core.scheduler.waiting) == 0
|
assert len(engine_core.scheduler.waiting) == 0
|
||||||
assert len(engine_core.scheduler.running) == 0
|
assert len(engine_core.scheduler.running) == 0
|
||||||
|
|
||||||
|
# Sending duplicate requests with same request_id
|
||||||
|
req0 = make_request()
|
||||||
|
req1 = make_request()
|
||||||
|
req0.request_id = req1.request_id = "test"
|
||||||
|
engine_core.add_request(req0)
|
||||||
|
|
||||||
|
while len(engine_core.step().outputs) > 0:
|
||||||
|
pass
|
||||||
|
|
||||||
|
engine_core.add_request(req1)
|
||||||
|
while len(engine_core.step().outputs) > 0:
|
||||||
|
pass
|
||||||
|
|
||||||
|
assert len(engine_core.scheduler.waiting) == 0
|
||||||
|
assert len(engine_core.scheduler.running) == 0
|
||||||
|
|
||||||
|
|
||||||
@create_new_process_for_each_test()
|
@create_new_process_for_each_test()
|
||||||
def test_engine_core_advanced_sampling(monkeypatch: pytest.MonkeyPatch):
|
def test_engine_core_advanced_sampling(monkeypatch: pytest.MonkeyPatch):
|
||||||
|
|||||||
@ -179,16 +179,6 @@ class EngineCore:
|
|||||||
scheduler_stats=self.scheduler.make_stats(),
|
scheduler_stats=self.scheduler.make_stats(),
|
||||||
)
|
)
|
||||||
scheduler_output = self.scheduler.schedule()
|
scheduler_output = self.scheduler.schedule()
|
||||||
|
|
||||||
# This case may occur when the only unfinished requests are
|
|
||||||
# structured output requests where the grammar has not finished
|
|
||||||
# compiling yet, so there's nothing to run.
|
|
||||||
if scheduler_output.total_num_scheduled_tokens == 0:
|
|
||||||
return EngineCoreOutputs(
|
|
||||||
outputs=[],
|
|
||||||
scheduler_stats=self.scheduler.make_stats(),
|
|
||||||
)
|
|
||||||
|
|
||||||
output = self.model_executor.execute_model(scheduler_output)
|
output = self.model_executor.execute_model(scheduler_output)
|
||||||
engine_core_outputs = self.scheduler.update_from_output(
|
engine_core_outputs = self.scheduler.update_from_output(
|
||||||
scheduler_output, output) # type: ignore
|
scheduler_output, output) # type: ignore
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user