mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-29 22:47:22 +08:00
Minor
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
a6e5d7b5b7
commit
1260e43230
@ -88,7 +88,7 @@ class BlockTable:
|
||||
# Clear the source row.
|
||||
self.block_table_diff_np[src].fill(0)
|
||||
|
||||
def apply_diff(self, num_reqs: int) -> None:
|
||||
def commit(self, num_reqs: int) -> None:
|
||||
if self.use_uva:
|
||||
# Only copy the diff to the GPU.
|
||||
ops.copy_subranges(
|
||||
@ -103,6 +103,7 @@ class BlockTable:
|
||||
# table is large.
|
||||
self.block_table[:num_reqs].copy_(self.block_table_cpu[:num_reqs],
|
||||
non_blocking=True)
|
||||
self.clear_diff()
|
||||
|
||||
def clear(self) -> None:
|
||||
self.block_table.fill_(0)
|
||||
|
||||
@ -163,9 +163,6 @@ class GPUModelRunner:
|
||||
self.seq_start_loc_np = self.seq_start_loc_cpu.numpy()
|
||||
|
||||
def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
|
||||
# Clean up diffs.
|
||||
self.input_batch.block_table.clear_diff()
|
||||
|
||||
# Remove stopped requests from the cached states.
|
||||
# Keep the states of the pre-empted requests.
|
||||
for req_id in scheduler_output.finished_req_ids:
|
||||
@ -270,7 +267,7 @@ class GPUModelRunner:
|
||||
|
||||
# OPTIMIZATION: Start copying the block table first.
|
||||
# This way, we can overlap the copy with the following CPU operations.
|
||||
self.input_batch.block_table.apply_diff(num_reqs)
|
||||
self.input_batch.block_table.commit(num_reqs)
|
||||
|
||||
# Get the number of scheduled tokens for each request.
|
||||
# TODO: The Python loop can be slow. Optimize.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user