mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 19:15:01 +08:00
[Core][Perf] Only invoke save_new_computed_blocks when computed blocks are not empty (#27799)
Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
This commit is contained in:
parent
a8141fa649
commit
4b68c4a55b
@ -306,11 +306,12 @@ class KVCacheManager:
|
|||||||
"Computed blocks should be empty when prefix caching is disabled"
|
"Computed blocks should be empty when prefix caching is disabled"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Append the new computed blocks to the request blocks until now to
|
if new_computed_block_list is not self.empty_kv_cache_blocks.blocks:
|
||||||
# avoid the case where the new blocks cannot be allocated.
|
# Append the new computed blocks to the request blocks until now to
|
||||||
self.coordinator.save_new_computed_blocks(
|
# avoid the case where the new blocks cannot be allocated.
|
||||||
request.request_id, new_computed_block_list
|
self.coordinator.save_new_computed_blocks(
|
||||||
)
|
request.request_id, new_computed_block_list
|
||||||
|
)
|
||||||
|
|
||||||
new_blocks = self.coordinator.allocate_new_blocks(
|
new_blocks = self.coordinator.allocate_new_blocks(
|
||||||
request.request_id, num_tokens_need_slot, num_encoder_tokens
|
request.request_id, num_tokens_need_slot, num_encoder_tokens
|
||||||
|
|||||||
@ -151,7 +151,7 @@ class SingleTypeKVCacheManager(ABC):
|
|||||||
num_tokens: The total number of tokens that need to be cached
|
num_tokens: The total number of tokens that need to be cached
|
||||||
(including tokens that are already cached).
|
(including tokens that are already cached).
|
||||||
"""
|
"""
|
||||||
num_cached_blocks = self.num_cached_block[request.request_id]
|
num_cached_blocks = self.num_cached_block.get(request.request_id, 0)
|
||||||
num_full_blocks = num_tokens // self.block_size
|
num_full_blocks = num_tokens // self.block_size
|
||||||
|
|
||||||
if num_cached_blocks >= num_full_blocks:
|
if num_cached_blocks >= num_full_blocks:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user