From c0c2dd1e0b75c70706f4d8dbcd1d75f1c1750e14 Mon Sep 17 00:00:00 2001 From: Or Ozeri Date: Thu, 20 Nov 2025 12:55:10 +0200 Subject: [PATCH] [BugFix] kv_offloading: Fix bug in loading of partial cpu blocks (#28951) Signed-off-by: Or Ozeri Co-authored-by: Cyrus Leung --- tests/v1/kv_offload/test_cpu_gpu.py | 4 ++-- vllm/v1/kv_offload/worker/cpu_gpu.py | 20 +++++++++----------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/tests/v1/kv_offload/test_cpu_gpu.py b/tests/v1/kv_offload/test_cpu_gpu.py index 0d4fa344d298c..a248104e16d2d 100644 --- a/tests/v1/kv_offload/test_cpu_gpu.py +++ b/tests/v1/kv_offload/test_cpu_gpu.py @@ -103,8 +103,8 @@ def test_transfer( for i in range(gpu_blocks_per_cpu_block): cpu_blocks_in_gpu_block_size.append(i + base_block_id) - # maybe skip a GPU block to test writing to the middle of a CPU block - if gpu_to_cpu: + # maybe skip a GPU block to test reading from the middle of a CPU block + if not gpu_to_cpu: gpu_blocks = gpu_blocks[gpu_blocks_per_cpu_block - 1 :] cpu_blocks_in_gpu_block_size = cpu_blocks_in_gpu_block_size[ gpu_blocks_per_cpu_block - 1 : diff --git a/vllm/v1/kv_offload/worker/cpu_gpu.py b/vllm/v1/kv_offload/worker/cpu_gpu.py index 0f2ec4a1b41f3..111046377a5da 100644 --- a/vllm/v1/kv_offload/worker/cpu_gpu.py +++ b/vllm/v1/kv_offload/worker/cpu_gpu.py @@ -135,22 +135,20 @@ class CpuGpuOffloadingHandler(OffloadingHandler): assert src_blocks.ndim == 1 assert dst_blocks.ndim == 1 - dst_sub_blocks_to_skip = -src_blocks.size % dst_block_size_factor src_sub_block_count = src_blocks.size * src_block_size_factor + dst_sub_block_count = dst_blocks.size * dst_block_size_factor + src_sub_blocks_to_skip = -dst_blocks.size % src_block_size_factor - assert ( - src_sub_block_count - == dst_blocks.size * dst_block_size_factor - dst_sub_blocks_to_skip - ) + assert dst_sub_block_count == src_sub_block_count - src_sub_blocks_to_skip - src_to_dst = np.empty((src_sub_block_count, 2), dtype=np.int64) - expand_block_ids(src_blocks, src_block_size_factor, src_to_dst[:, 0]) + src_to_dst = np.empty((dst_sub_block_count, 2), dtype=np.int64) expand_block_ids( - dst_blocks, - dst_block_size_factor, - src_to_dst[:, 1], - skip_count=dst_sub_blocks_to_skip, + src_blocks, + src_block_size_factor, + src_to_dst[:, 0], + skip_count=src_sub_blocks_to_skip, ) + expand_block_ids(dst_blocks, dst_block_size_factor, src_to_dst[:, 1]) src_to_dst_tensor = torch.from_numpy(src_to_dst) event = self.events_pool.pop() if self.events_pool else torch.Event()