From 685c99ee77b4818dcdd15b30fe0e0eff0d5d22ec Mon Sep 17 00:00:00 2001 From: Yue Zhang <81500899+KevinCheung2259@users.noreply.github.com> Date: Sun, 2 Nov 2025 05:08:56 +0800 Subject: [PATCH] [KV offload] Offloading connector async scheduling support (#27648) Signed-off-by: KevinCheung2259 <2651309292@qq.com> Co-authored-by: Nick Hill --- .../kv_transfer/kv_connector/v1/offloading_connector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py index 19344e5784c23..7567c7fae5789 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py @@ -274,8 +274,8 @@ class OffloadingConnectorScheduler: if num_new_blocks <= 0: continue - num_gpu_blocks = num_blocks * self.block_size_factor - assert len(req.block_hashes) >= num_gpu_blocks + # NOTE: In async scheduling, placeholders may temporarily make + # len(req.block_hashes) < num_blocks * self.block_size_factor. new_block_hashes = self._get_block_hashes( req, start_idx=start_block_idx, end_idx=num_blocks