[KV offload] Offloading connector async scheduling support (#27648)

Signed-off-by: KevinCheung2259 <2651309292@qq.com> Co-authored-by: Nick Hill <nhill@redhat.com>
2026-03-20 02:41:19 +08:00 · 2025-11-02 05:08:56 +08:00 · 2025-11-02 05:08:56 +08:00 · 685c99ee77
commit 685c99ee77
parent 1e88fb751b
1 changed files with 2 additions and 2 deletions
--- a/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py
@ -274,8 +274,8 @@ class OffloadingConnectorScheduler:
            if num_new_blocks <= 0:
                continue

-            num_gpu_blocks = num_blocks * self.block_size_factor
-            assert len(req.block_hashes) >= num_gpu_blocks
+            # NOTE: In async scheduling, placeholders may temporarily make
+            # len(req.block_hashes) < num_blocks * self.block_size_factor.

            new_block_hashes = self._get_block_hashes(
                req, start_idx=start_block_idx, end_idx=num_blocks