From 80b6080ddcad0653daa6b776eb71a5a7029b70d8 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Sun, 16 Nov 2025 14:46:46 -0800 Subject: [PATCH] [BugFix] Fix async scheduling + chunked prefill + preemption (#28787) Signed-off-by: Nick Hill --- tests/v1/e2e/test_async_scheduling.py | 10 ++++------ vllm/v1/core/sched/scheduler.py | 4 +--- vllm/v1/utils.py | 3 +++ 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/tests/v1/e2e/test_async_scheduling.py b/tests/v1/e2e/test_async_scheduling.py index dbe403ece051..c4aca82416cd 100644 --- a/tests/v1/e2e/test_async_scheduling.py +++ b/tests/v1/e2e/test_async_scheduling.py @@ -65,9 +65,8 @@ def test_without_spec_decoding( (True, "mp", True, None, False), (True, "uni", True, None, False), (False, "mp", True, None, True), - # Async scheduling + preemption + chunked prefill needs to be fixed (WIP) - # (True, "mp", True, None, True), - # (True, "uni", True, None, True), + (True, "mp", True, None, True), + (True, "uni", True, None, True), ] run_tests( @@ -103,9 +102,8 @@ def test_with_spec_decoding(monkeypatch: pytest.MonkeyPatch): (False, "mp", True, spec_config_short, True), (True, "uni", True, spec_config, False), (True, "uni", True, spec_config_short, False), - # Async scheduling + preemption + chunked prefill needs to be fixed (WIP) - # (True, "mp", True, spec_config, True), - # (True, "uni", True, spec_config_short, True), + (True, "mp", True, spec_config, True), + (True, "uni", True, spec_config_short, True), ] run_tests( diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index bc15979dea62..8e62542337a7 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -778,9 +778,7 @@ class Scheduler(SchedulerInterface): assert not scheduled_in_prev_step resumed_req_ids.add(req_id) if not scheduled_in_prev_step: - all_token_ids[req_id] = req.all_token_ids[ - : req.num_computed_tokens + num_tokens - ] + all_token_ids[req_id] = req.all_token_ids.copy() new_block_ids.append( req_to_new_blocks[req_id].get_block_ids(allow_none=True) ) diff --git a/vllm/v1/utils.py b/vllm/v1/utils.py index a401f6d74cdd..29099d1e9b17 100644 --- a/vllm/v1/utils.py +++ b/vllm/v1/utils.py @@ -97,6 +97,9 @@ class ConstantList(Generic[T], Sequence): def __repr__(self): return f"ConstantList({self._x})" + def copy(self) -> list[T]: + return self._x.copy() + class CpuGpuBuffer: """Buffer to easily copy tensors between CPU and GPU."""