mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-21 05:27:03 +08:00
[KV offload] Offloading connector async scheduling support (#27648)
Signed-off-by: KevinCheung2259 <2651309292@qq.com> Co-authored-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
1e88fb751b
commit
685c99ee77
@ -274,8 +274,8 @@ class OffloadingConnectorScheduler:
|
|||||||
if num_new_blocks <= 0:
|
if num_new_blocks <= 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
num_gpu_blocks = num_blocks * self.block_size_factor
|
# NOTE: In async scheduling, placeholders may temporarily make
|
||||||
assert len(req.block_hashes) >= num_gpu_blocks
|
# len(req.block_hashes) < num_blocks * self.block_size_factor.
|
||||||
|
|
||||||
new_block_hashes = self._get_block_hashes(
|
new_block_hashes = self._get_block_hashes(
|
||||||
req, start_idx=start_block_idx, end_idx=num_blocks
|
req, start_idx=start_block_idx, end_idx=num_blocks
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user