mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 03:54:56 +08:00
[BugFix] Fix async scheduling + chunked prefill + preemption (#28787)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
03ee48111d
commit
80b6080ddc
@ -65,9 +65,8 @@ def test_without_spec_decoding(
|
||||
(True, "mp", True, None, False),
|
||||
(True, "uni", True, None, False),
|
||||
(False, "mp", True, None, True),
|
||||
# Async scheduling + preemption + chunked prefill needs to be fixed (WIP)
|
||||
# (True, "mp", True, None, True),
|
||||
# (True, "uni", True, None, True),
|
||||
(True, "mp", True, None, True),
|
||||
(True, "uni", True, None, True),
|
||||
]
|
||||
|
||||
run_tests(
|
||||
@ -103,9 +102,8 @@ def test_with_spec_decoding(monkeypatch: pytest.MonkeyPatch):
|
||||
(False, "mp", True, spec_config_short, True),
|
||||
(True, "uni", True, spec_config, False),
|
||||
(True, "uni", True, spec_config_short, False),
|
||||
# Async scheduling + preemption + chunked prefill needs to be fixed (WIP)
|
||||
# (True, "mp", True, spec_config, True),
|
||||
# (True, "uni", True, spec_config_short, True),
|
||||
(True, "mp", True, spec_config, True),
|
||||
(True, "uni", True, spec_config_short, True),
|
||||
]
|
||||
|
||||
run_tests(
|
||||
|
||||
@ -778,9 +778,7 @@ class Scheduler(SchedulerInterface):
|
||||
assert not scheduled_in_prev_step
|
||||
resumed_req_ids.add(req_id)
|
||||
if not scheduled_in_prev_step:
|
||||
all_token_ids[req_id] = req.all_token_ids[
|
||||
: req.num_computed_tokens + num_tokens
|
||||
]
|
||||
all_token_ids[req_id] = req.all_token_ids.copy()
|
||||
new_block_ids.append(
|
||||
req_to_new_blocks[req_id].get_block_ids(allow_none=True)
|
||||
)
|
||||
|
||||
@ -97,6 +97,9 @@ class ConstantList(Generic[T], Sequence):
|
||||
def __repr__(self):
|
||||
return f"ConstantList({self._x})"
|
||||
|
||||
def copy(self) -> list[T]:
|
||||
return self._x.copy()
|
||||
|
||||
|
||||
class CpuGpuBuffer:
|
||||
"""Buffer to easily copy tensors between CPU and GPU."""
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user