mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-23 10:34:27 +08:00
[BugFix] kv_offloading: Fix bug in loading of partial cpu blocks (#28951)
Signed-off-by: Or Ozeri <oro@il.ibm.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
parent
06c20c9904
commit
c0c2dd1e0b
@ -103,8 +103,8 @@ def test_transfer(
|
||||
for i in range(gpu_blocks_per_cpu_block):
|
||||
cpu_blocks_in_gpu_block_size.append(i + base_block_id)
|
||||
|
||||
# maybe skip a GPU block to test writing to the middle of a CPU block
|
||||
if gpu_to_cpu:
|
||||
# maybe skip a GPU block to test reading from the middle of a CPU block
|
||||
if not gpu_to_cpu:
|
||||
gpu_blocks = gpu_blocks[gpu_blocks_per_cpu_block - 1 :]
|
||||
cpu_blocks_in_gpu_block_size = cpu_blocks_in_gpu_block_size[
|
||||
gpu_blocks_per_cpu_block - 1 :
|
||||
|
||||
@ -135,22 +135,20 @@ class CpuGpuOffloadingHandler(OffloadingHandler):
|
||||
assert src_blocks.ndim == 1
|
||||
assert dst_blocks.ndim == 1
|
||||
|
||||
dst_sub_blocks_to_skip = -src_blocks.size % dst_block_size_factor
|
||||
src_sub_block_count = src_blocks.size * src_block_size_factor
|
||||
dst_sub_block_count = dst_blocks.size * dst_block_size_factor
|
||||
src_sub_blocks_to_skip = -dst_blocks.size % src_block_size_factor
|
||||
|
||||
assert (
|
||||
src_sub_block_count
|
||||
== dst_blocks.size * dst_block_size_factor - dst_sub_blocks_to_skip
|
||||
)
|
||||
assert dst_sub_block_count == src_sub_block_count - src_sub_blocks_to_skip
|
||||
|
||||
src_to_dst = np.empty((src_sub_block_count, 2), dtype=np.int64)
|
||||
expand_block_ids(src_blocks, src_block_size_factor, src_to_dst[:, 0])
|
||||
src_to_dst = np.empty((dst_sub_block_count, 2), dtype=np.int64)
|
||||
expand_block_ids(
|
||||
dst_blocks,
|
||||
dst_block_size_factor,
|
||||
src_to_dst[:, 1],
|
||||
skip_count=dst_sub_blocks_to_skip,
|
||||
src_blocks,
|
||||
src_block_size_factor,
|
||||
src_to_dst[:, 0],
|
||||
skip_count=src_sub_blocks_to_skip,
|
||||
)
|
||||
expand_block_ids(dst_blocks, dst_block_size_factor, src_to_dst[:, 1])
|
||||
src_to_dst_tensor = torch.from_numpy(src_to_dst)
|
||||
|
||||
event = self.events_pool.pop() if self.events_pool else torch.Event()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user